commit 597774b6e3bb876bbd585a8657b87b8681d67c51 parent ab38966272114c858f459328e694354f61dee2ea Author: Steve Gattuso <steve@stevegattuso.me> Date: Mon, 6 Nov 2023 11:11:12 +0100 add note for tz woes Diffstat:
| M | forerad/scrapers/historical.py | | | 6 | ++++-- |
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/forerad/scrapers/historical.py b/forerad/scrapers/historical.py @@ -154,8 +154,10 @@ class HistoricalTripArchive(): ]] df = df.replace({np.nan: None}) - # Localize timestamps, convert to UTC, then convert to UNIX epoch to - # make things speedier when querying in sqlite + # Timestamps in the archives are in NYC time but don't differentiate + # between EST and EDT. I'm admittedly punting here by assigning + # ambiguous times to NaT and then dropna'ing them below. At some point + # this will likely need to be revisited. df['started_at'] = pd.to_datetime(df['started_at'])\ .dt.tz_localize(utils.TZ_NYC, ambiguous='NaT')\ .dt.tz_convert(utils.TZ_UTC)