commit 137dcd04ffb7d6c091f2a98b0e64892806e358ba
parent 430aff814eff63bde364c8db6439aaec13193014
Author: Steve Gattuso <steve@stevegattuso.me>
Date: Sun, 5 Nov 2023 13:44:47 +0100
use unix timestamps for better speed
Diffstat:
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/forerad/persistence.py b/forerad/persistence.py
@@ -3,6 +3,10 @@ import pandas as pd
import numpy as np
import datetime
import pathlib
+import pytz
+
+TZ_NYC = pytz.timezone('America/New_York')
+TZ_UTC = pytz.timezone('UTC')
class SQLiteStore():
def __init__(self):
@@ -14,7 +18,6 @@ class SQLiteStore():
Fetches a dataframe of all trips where started_at >= first_dt and
started_at < last_dt. Note that both columns are stored in UTC.
"""
-
query = """
SELECT *
FROM historical_trips
@@ -23,7 +26,15 @@ class SQLiteStore():
started_at < ?
"""
- return pd.read_sql(query, self.db, params=(first_dt, last_dt))
+ # Localize, convert, and format as UNIX epoch
+ start = TZ_NYC.localize(first_dt)\
+ .astimezone(TZ_UTC)\
+ .strftime('%s')
+ end = TZ_NYC.localize(last_dt)\
+ .astimezone(TZ_UTC)\
+ .strftime('%s')
+
+ return pd.read_sql(query, self.db, params=(start, end))
def __store_formatted(self, df):
"""
@@ -36,6 +47,15 @@ class SQLiteStore():
]]
df = df.replace({np.nan: None})
+ # Localize timestamps, convert to UTC, then convert to UNIX epoch to
+ # make things speedier when querying in sqlite
+ df['started_at'] = pd.to_datetime(df['started_at'])\
+ .dt.tz_localize(TZ_UTC)\
+ .astype(int) / 10**9
+ df['ended_at'] = pd.to_datetime(df['ended_at'])\
+ .dt.tz_localize(TZ_UTC)\
+ .astype(int) / 10**9
+
df.to_sql(
name="historical_trips",
con=self.db,
diff --git a/migrations/20231030_01_HoM9F-create-the-initial-schema.sql b/migrations/20231030_01_HoM9F-create-the-initial-schema.sql
@@ -3,8 +3,8 @@
CREATE TABLE historical_trips (
ride_id INTEGER PRIMARY KEY,
rideable_type TEXT,
- started_at TEXT NOT NULL,
- ended_at TEXT NOT NULL,
+ started_at NUMERIC NOT NULL,
+ ended_at NUMERIC NOT NULL,
start_lat NUMERIC,
start_lng NUMERIC,
end_lat NUMERIC,