commit 91bef797d36904fe60460034cf4b83ba996be28a
parent ab7a0af09e6965e2a489ef4986278460f949444f
Author: Steve Gattuso <steve@stevegattuso.me>
Date: Sun, 5 Nov 2023 17:07:13 +0100
properly detect if data is missing
Diffstat:
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/bin/scraper b/bin/scraper
@@ -5,6 +5,7 @@ This script allows you to download citibike ride history archives into your loca
import sys
import datetime
import argparse
+import pandas as pd
import forerad.persistence as persistence
import forerad.utils as utils
@@ -18,14 +19,18 @@ def is_persisted(archive: scrape_historical.MonthlyArchive) -> bool:
"""
Returns whether or not an archive is already persisted in the database.
"""
- first_dt = datetime.datetime.combine(archive.date, datetime.datetime.min.time())
-
- trips = store.fetch_trips(
- first_dt=first_dt,
- last_dt=first_dt + datetime.timedelta(days=1)
+ next_month = (archive.date.replace(day=1) + datetime.timedelta(days=32)).replace(day=1)
+ all_days = pd.date_range(
+ start=archive.date,
+ end=next_month - datetime.timedelta(days=1),
+ freq="D"
)
- return len(trips) > 0
+ for date in all_days:
+ if store.fetch_daily_volume(date) == 0:
+ return False
+
+ return True
def main__fetch(args: argparse.Namespace):
diff --git a/forerad/persistence.py b/forerad/persistence.py
@@ -129,7 +129,6 @@ class SQLiteStore():
end_dt = self.__localize_date(date + datetime.timedelta(days=1))
result = pd.read_sql(query, self.db, params=(start_dt, end_dt))
-
return result.iat[0, 1]
def fetch_daily_volume_rollup(self) -> pd.DataFrame: