forerad

Utilities for collecting and analyzing with Citibike data in Python
Log | Files | Refs | README

commit 137dcd04ffb7d6c091f2a98b0e64892806e358ba
parent 430aff814eff63bde364c8db6439aaec13193014
Author: Steve Gattuso <steve@stevegattuso.me>
Date:   Sun,  5 Nov 2023 13:44:47 +0100

use unix timestamps for better speed

Diffstat:
Mforerad/persistence.py | 24++++++++++++++++++++++--
Mmigrations/20231030_01_HoM9F-create-the-initial-schema.sql | 4++--
2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/forerad/persistence.py b/forerad/persistence.py @@ -3,6 +3,10 @@ import pandas as pd import numpy as np import datetime import pathlib +import pytz + +TZ_NYC = pytz.timezone('America/New_York') +TZ_UTC = pytz.timezone('UTC') class SQLiteStore(): def __init__(self): @@ -14,7 +18,6 @@ class SQLiteStore(): Fetches a dataframe of all trips where started_at >= first_dt and started_at < last_dt. Note that both columns are stored in UTC. """ - query = """ SELECT * FROM historical_trips @@ -23,7 +26,15 @@ class SQLiteStore(): started_at < ? """ - return pd.read_sql(query, self.db, params=(first_dt, last_dt)) + # Localize, convert, and format as UNIX epoch + start = TZ_NYC.localize(first_dt)\ + .astimezone(TZ_UTC)\ + .strftime('%s') + end = TZ_NYC.localize(last_dt)\ + .astimezone(TZ_UTC)\ + .strftime('%s') + + return pd.read_sql(query, self.db, params=(start, end)) def __store_formatted(self, df): """ @@ -36,6 +47,15 @@ class SQLiteStore(): ]] df = df.replace({np.nan: None}) + # Localize timestamps, convert to UTC, then convert to UNIX epoch to + # make things speedier when querying in sqlite + df['started_at'] = pd.to_datetime(df['started_at'])\ + .dt.tz_localize(TZ_UTC)\ + .astype(int) / 10**9 + df['ended_at'] = pd.to_datetime(df['ended_at'])\ + .dt.tz_localize(TZ_UTC)\ + .astype(int) / 10**9 + df.to_sql( name="historical_trips", con=self.db, diff --git a/migrations/20231030_01_HoM9F-create-the-initial-schema.sql b/migrations/20231030_01_HoM9F-create-the-initial-schema.sql @@ -3,8 +3,8 @@ CREATE TABLE historical_trips ( ride_id INTEGER PRIMARY KEY, rideable_type TEXT, - started_at TEXT NOT NULL, - ended_at TEXT NOT NULL, + started_at NUMERIC NOT NULL, + ended_at NUMERIC NOT NULL, start_lat NUMERIC, start_lng NUMERIC, end_lat NUMERIC,