commit ee68047881ac7e16ca5051a7a90abbca64efd08d parent 597774b6e3bb876bbd585a8657b87b8681d67c51 Author: Steve Gattuso <steve@stevegattuso.me> Date: Mon, 6 Nov 2023 11:31:26 +0100 fix missing months Diffstat:
| M | bin/hourly-volume-rollup | | | 7 | ++++--- |
| M | forerad/scrapers/historical.py | | | 3 | ++- |
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/bin/hourly-volume-rollup b/bin/hourly-volume-rollup @@ -58,10 +58,11 @@ def main__populate(month_str): year, month = utils.parse_month_str(month_str) archives = [a for a in archives if a.date.year == year and a.date.month == month] - # Filter out completed rollups - archives = [a for a in archives if not is_complete(a)] - utils.logger.info(f'Rolling up {len(archives)} months of data') + if not month_str: + # Filter out completed rollups + archives = [a for a in archives if not is_complete(a)] + utils.logger.info(f'Rolling up {len(archives)} months of data') [derive_rollup(a) for a in archives] diff --git a/forerad/scrapers/historical.py b/forerad/scrapers/historical.py @@ -11,7 +11,8 @@ from botocore import UNSIGNED import forerad.utils as utils -ARCHIVE_REGEX = re.compile("^([0-9]{4})([0-9]{2})-citibike-tripdata((.zip$)|(.csv.zip$))") +# Notice the `(i)?` - they spelled Citibike wrong in one of the archives... lol +ARCHIVE_REGEX = re.compile("^([0-9]{4})([0-9]{2})-cit(i)?bike-tripdata((.zip$)|(.csv.zip$))") CACHE_DIR = pathlib.Path(__file__).parent.parent.parent / pathlib.Path('.forerad-cache') TRIP_BUCKET = 'tripdata' if not CACHE_DIR.exists():