forerad

Utilities for collecting and analyzing with Citibike data in Python
Log | Files | Refs | README

commit 91bef797d36904fe60460034cf4b83ba996be28a
parent ab7a0af09e6965e2a489ef4986278460f949444f
Author: Steve Gattuso <steve@stevegattuso.me>
Date:   Sun,  5 Nov 2023 17:07:13 +0100

properly detect if data is missing

Diffstat:
Mbin/scraper | 17+++++++++++------
Mforerad/persistence.py | 1-
2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/bin/scraper b/bin/scraper @@ -5,6 +5,7 @@ This script allows you to download citibike ride history archives into your loca import sys import datetime import argparse +import pandas as pd import forerad.persistence as persistence import forerad.utils as utils @@ -18,14 +19,18 @@ def is_persisted(archive: scrape_historical.MonthlyArchive) -> bool: """ Returns whether or not an archive is already persisted in the database. """ - first_dt = datetime.datetime.combine(archive.date, datetime.datetime.min.time()) - - trips = store.fetch_trips( - first_dt=first_dt, - last_dt=first_dt + datetime.timedelta(days=1) + next_month = (archive.date.replace(day=1) + datetime.timedelta(days=32)).replace(day=1) + all_days = pd.date_range( + start=archive.date, + end=next_month - datetime.timedelta(days=1), + freq="D" ) - return len(trips) > 0 + for date in all_days: + if store.fetch_daily_volume(date) == 0: + return False + + return True def main__fetch(args: argparse.Namespace): diff --git a/forerad/persistence.py b/forerad/persistence.py @@ -129,7 +129,6 @@ class SQLiteStore(): end_dt = self.__localize_date(date + datetime.timedelta(days=1)) result = pd.read_sql(query, self.db, params=(start_dt, end_dt)) - return result.iat[0, 1] def fetch_daily_volume_rollup(self) -> pd.DataFrame: