forerad

Utilities for collecting and analyzing with Citibike data in Python
Log | Files | Refs | README

commit 597774b6e3bb876bbd585a8657b87b8681d67c51
parent ab38966272114c858f459328e694354f61dee2ea
Author: Steve Gattuso <steve@stevegattuso.me>
Date:   Mon,  6 Nov 2023 11:11:12 +0100

add note for tz woes

Diffstat:
Mforerad/scrapers/historical.py | 6++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/forerad/scrapers/historical.py b/forerad/scrapers/historical.py @@ -154,8 +154,10 @@ class HistoricalTripArchive(): ]] df = df.replace({np.nan: None}) - # Localize timestamps, convert to UTC, then convert to UNIX epoch to - # make things speedier when querying in sqlite + # Timestamps in the archives are in NYC time but don't differentiate + # between EST and EDT. I'm admittedly punting here by assigning + # ambiguous times to NaT and then dropna'ing them below. At some point + # this will likely need to be revisited. df['started_at'] = pd.to_datetime(df['started_at'])\ .dt.tz_localize(utils.TZ_NYC, ambiguous='NaT')\ .dt.tz_convert(utils.TZ_UTC)