From 8662cb459269edb28e6ef9dd34fb36fb19e73f63 Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Wed, 11 Mar 2026 01:16:00 +0100 Subject: [PATCH 1/4] feat: implement caching for google sheets data - Add cache_utils.py with JSON caching for Google Sheets - Authenticate and cache Drive/Sheets API services globally to reuse tokens - Use CACHE_SHEET_MAP dict to resolve cache names securely to Sheet IDs - Change app.py data fetching to skip downloads if modifiedTime matches cache - Replace global socket timeout with httplib2 to fix Werkzeug timeouts - Add VS Code attach debugpy configurations to launch.json and Makefile --- .vscode/launch.json | 33 +++++++ Makefile | 10 +- app.py | 63 ++++++++++--- scripts/cache_utils.py | 172 ++++++++++++++++++++++++++++++++++ scripts/match_payments.py | 4 +- scripts/sync_fio_to_sheets.py | 9 +- 6 files changed, 270 insertions(+), 21 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 scripts/cache_utils.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..8c6d99d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,33 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Flask", + "type": "debugpy", + "request": "launch", + "module": "flask", + "python": "${workspaceFolder}/.venv/bin/python", + "env": { + "FLASK_APP": "app.py", + "FLASK_DEBUG": "1" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload", + "--host", "0.0.0.0", + "--port", "5001" + ], + "jinja": true + }, + { + "name": "Python Debugger: Attach", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + } + } + ] +} \ No newline at end of file diff --git a/Makefile b/Makefile index a337296..5190434 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help fees match web image run sync sync-2026 test test-v docs +.PHONY: help fees match web web-debug image run sync sync-2026 test test-v docs export PYTHONPATH := scripts:$(PYTHONPATH) VENV := .venv @@ -15,8 +15,9 @@ help: @echo "Available targets:" @echo " make fees - Calculate monthly fees from the attendance sheet" @echo " make match - Match Fio bank payments against expected attendance fees" - @echo " make web - Start a dynamic web dashboard locally" - @echo " make image - Build an OCI container image" + @echo " make web - Start a dynamic web dashboard locally" + @echo " make web-debug - Start a dynamic web dashboard locally in debug mode" + @echo " make image - Build an OCI container image" @echo " make run - Run the built Docker image locally" @echo " make sync - Sync Fio transactions to Google Sheets" @echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)" @@ -40,6 +41,9 @@ match: $(PYTHON) web: $(PYTHON) $(PYTHON) app.py +web-debug: $(PYTHON) + FLASK_DEBUG=1 $(PYTHON) app.py + image: docker build -t fuj-management:latest -f build/Dockerfile . diff --git a/app.py b/app.py index 63b6de1..f7915b0 100644 --- a/app.py +++ b/app.py @@ -6,14 +6,43 @@ import time import os import io import qrcode +import logging from flask import Flask, render_template, g, send_file, request +# Configure logging, allowing override via LOG_LEVEL environment variable +log_level = os.environ.get("LOG_LEVEL", "INFO").upper() +logging.basicConfig(level=getattr(logging, log_level, logging.INFO), format='%(asctime)s - %(name)s:%(filename)s:%(lineno)d [%(funcName)s] - %(levelname)s - %(message)s') + # Add scripts directory to path to allow importing from it scripts_dir = Path(__file__).parent / "scripts" sys.path.append(str(scripts_dir)) from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID +from cache_utils import get_sheet_modified_time, read_cache, write_cache + +def get_cached_data(cache_key, sheet_id, fetch_func, *args, **kwargs): + mod_time = get_sheet_modified_time(cache_key) + if mod_time: + cached = read_cache(cache_key, mod_time) + if cached is not None: + return cached + data = fetch_func(*args, **kwargs) + if mod_time: + write_cache(cache_key, mod_time, data) + return data + +def get_cached_exceptions(sheet_id, creds_path): + cache_key = "exceptions_dict" + mod_time = get_sheet_modified_time(cache_key) + if mod_time: + cached = read_cache(cache_key, mod_time) + if cached is not None: + return {tuple(k): v for k, v in cached} + data = fetch_exceptions(sheet_id, creds_path) + if mod_time: + write_cache(cache_key, mod_time, [[list(k), v] for k, v in data.items()]) + return data def get_month_labels(sorted_months, merged_months): labels = {} @@ -78,10 +107,11 @@ def fees(): attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit" payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit" - members, sorted_months = get_members_with_fees() + members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees) record_step("fetch_members") - if not members: + if not members_data: return "No data." + members, sorted_months = members_data # Filter to adults only for display results = [(name, fees) for name, tier, fees in members if tier == "A"] @@ -93,7 +123,7 @@ def fees(): # Get exceptions for formatting credentials_path = ".secret/fuj-management-bot-credentials.json" - exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_exceptions") formatted_results = [] @@ -135,10 +165,11 @@ def fees_juniors(): attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit#gid={JUNIOR_SHEET_GID}" payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit" - members, sorted_months = get_junior_members_with_fees() + members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees) record_step("fetch_junior_members") - if not members: + if not members_data: return "No data." + members, sorted_months = members_data # Sort members by name results = sorted([(name, fees) for name, tier, fees in members], key=lambda x: x[0]) @@ -150,7 +181,7 @@ def fees_juniors(): # Get exceptions for formatting (reusing payments sheet) credentials_path = ".secret/fuj-management-bot-credentials.json" - exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_exceptions") formatted_results = [] @@ -214,14 +245,15 @@ def reconcile_view(): # Use hardcoded credentials path for now, consistent with other scripts credentials_path = ".secret/fuj-management-bot-credentials.json" - members, sorted_months = get_members_with_fees() + members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees) record_step("fetch_members") - if not members: + if not members_data: return "No data." + members, sorted_months = members_data - transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path) + transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_payments") - exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_exceptions") result = reconcile(members, sorted_months, transactions, exceptions) record_step("reconcile") @@ -306,14 +338,15 @@ def reconcile_juniors_view(): credentials_path = ".secret/fuj-management-bot-credentials.json" - junior_members, sorted_months = get_junior_members_with_fees() + junior_members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees) record_step("fetch_junior_members") - if not junior_members: + if not junior_members_data: return "No data." + junior_members, sorted_months = junior_members_data - transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path) + transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_payments") - exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_exceptions") # Adapt junior tuple format (name, tier, {month: (fee, total_count, adult_count, junior_count)}) @@ -414,7 +447,7 @@ def payments(): payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit" credentials_path = ".secret/fuj-management-bot-credentials.json" - transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path) + transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_payments") # Group transactions by person diff --git a/scripts/cache_utils.py b/scripts/cache_utils.py new file mode 100644 index 0000000..2fa1bec --- /dev/null +++ b/scripts/cache_utils.py @@ -0,0 +1,172 @@ +import json +import os +import socket +import logging +from datetime import datetime +from pathlib import Path +from google.oauth2 import service_account +from googleapiclient.discovery import build + +logger = logging.getLogger(__name__) + +# Constants +CACHE_DIR = Path(__file__).parent.parent / "tmp" +CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json" +DRIVE_TIMEOUT = 10 # seconds +CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 1800)) # 30 min default for max cache age +CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default + +# Known mappings mapping "cache name" to Google Sheet ID +CACHE_SHEET_MAP = { + "attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA", + "attendance_juniors": "1wXm4gB0rW_LCHgLhCqg0Rk-pGkP5xKIf14dO3D3Z_g4", + "exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y", + "transactions_ledger": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y" +} + +# Global state to track last Drive API check time per sheet +_LAST_CHECKED = {} +_DRIVE_SERVICE = None + +def _get_drive_service(): + global _DRIVE_SERVICE + if _DRIVE_SERVICE is not None: + return _DRIVE_SERVICE + + if not CREDS_PATH.exists(): + logger.warning(f"Credentials not found at {CREDS_PATH}. Cannot check Google Drive API.") + return None + + try: + creds = service_account.Credentials.from_service_account_file( + str(CREDS_PATH), + scopes=["https://www.googleapis.com/auth/drive.readonly"] + ) + + # Apply timeout safely to the httplib2 connection without mutating global socket + import httplib2 + import google_auth_httplib2 + http = httplib2.Http(timeout=DRIVE_TIMEOUT) + http = google_auth_httplib2.AuthorizedHttp(creds, http=http) + + _DRIVE_SERVICE = build("drive", "v3", http=http, cache_discovery=False) + return _DRIVE_SERVICE + except Exception as e: + logger.error(f"Failed to build Drive API service: {e}") + return None + +import time + +def get_sheet_modified_time(cache_key: str) -> str | None: + """Gets the modifiedTime from Google Drive API for a given cache_key. + Returns the ISO timestamp string if successful. + If the Drive API fails (e.g., lack of permissions for public sheets), + it generates a virtual time bucket string to provide a 5-minute TTL cache. + """ + sheet_id = CACHE_SHEET_MAP.get(cache_key, cache_key) + + cache_file = CACHE_DIR / f"{cache_key}_cache.json" + + # 1. Check if we should skip the Drive API check entirely (global memory TTL) + now = time.time() + last_check = _LAST_CHECKED.get(sheet_id, 0) + + if CACHE_API_CHECK_TTL_SECONDS > 0 and (now - last_check) < CACHE_API_CHECK_TTL_SECONDS: + # We checked recently. Do we have a valid cache file? + if cache_file.exists(): + try: + # Still respect the older, broader CACHE_TTL_SECONDS + file_mtime = os.path.getmtime(cache_file) + if CACHE_TTL_SECONDS <= 0 or (now - file_mtime) < CACHE_TTL_SECONDS: + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + cached_time = cache_data.get("modifiedTime") + if cached_time: + logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL") + return cached_time + except Exception as e: + logger.warning(f"Error reading existing cache during API skip for {sheet_id}: {e}") + + # 2. Check if the cache file is simply too new (legacy check) + if CACHE_TTL_SECONDS > 0 and cache_file.exists(): + try: + file_mtime = os.path.getmtime(cache_file) + if time.time() - file_mtime < CACHE_TTL_SECONDS: + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + cached_time = cache_data.get("modifiedTime") + if cached_time: + logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_TTL_SECONDS}s max CACHE_TTL") + # We consider this a valid check, update the global state + _LAST_CHECKED[sheet_id] = now + return cached_time + except Exception as e: + logger.warning(f"Error checking cache TTL for {sheet_id}: {e}") + + def _fallback_ttl(): + bucket = int(time.time() // 300) + return f"ttl-5m-{bucket}" + + logger.info(f"Checking Drive API for {sheet_id}") + drive_service = _get_drive_service() + if not drive_service: + return _fallback_ttl() + + try: + file_meta = drive_service.files().get(fileId=sheet_id, fields="modifiedTime", supportsAllDrives=True).execute() + # Successfully checked API, update the global state + _LAST_CHECKED[sheet_id] = time.time() + return file_meta.get("modifiedTime") + except Exception as e: + logger.warning(f"Could not get modifiedTime for sheet {sheet_id}: {e}. Falling back to 5-minute TTL.") + return _fallback_ttl() + +def read_cache(sheet_id: str, current_modified_time: str) -> list | dict | None: + """Reads the JSON cache for the given sheet_id. + Returns the cached data if it exists AND the cached modifiedTime matches + current_modified_time. + Otherwise, returns None. + """ + if not current_modified_time: + return None + + cache_file = CACHE_DIR / f"{sheet_id}_cache.json" + if not cache_file.exists(): + return None + + try: + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + + cached_time = cache_data.get("modifiedTime") + if cached_time == current_modified_time: + logger.info(f"Cache hit for {sheet_id} ({current_modified_time})") + return cache_data.get("data") + else: + logger.info(f"Cache miss for {sheet_id}. Cached: {cached_time}, Current: {current_modified_time}") + return None + except Exception as e: + logger.error(f"Failed to read cache {cache_file}: {e}") + return None + +def write_cache(sheet_id: str, modified_time: str, data: list | dict) -> None: + """Writes the data to a JSON cache file with the given modified_time.""" + if not modified_time: + return + + try: + CACHE_DIR.mkdir(parents=True, exist_ok=True) + cache_file = CACHE_DIR / f"{sheet_id}_cache.json" + + cache_data = { + "modifiedTime": modified_time, + "data": data, + "cachedAt": datetime.now().isoformat() + } + + with open(cache_file, "w", encoding="utf-8") as f: + json.dump(cache_data, f, ensure_ascii=False) + + logger.info(f"Wrote cache for {sheet_id}") + except Exception as e: + logger.error(f"Failed to write cache {sheet_id}: {e}") diff --git a/scripts/match_payments.py b/scripts/match_payments.py index c400a65..5d426d9 100644 --- a/scripts/match_payments.py +++ b/scripts/match_payments.py @@ -300,8 +300,8 @@ def reconcile( norm_name = normalize(name) norm_period = normalize(m) fee_data = member_fees[name].get(m, (0, 0)) - original_expected = fee_data[0] if isinstance(fee_data, tuple) else fee_data - attendance_count = fee_data[1] if isinstance(fee_data, tuple) else 0 + original_expected = fee_data[0] if isinstance(fee_data, (tuple, list)) else fee_data + attendance_count = fee_data[1] if isinstance(fee_data, (tuple, list)) else 0 ex_data = exceptions.get((norm_name, norm_period)) if ex_data is not None: diff --git a/scripts/sync_fio_to_sheets.py b/scripts/sync_fio_to_sheets.py index 42ec9d7..652f315 100644 --- a/scripts/sync_fio_to_sheets.py +++ b/scripts/sync_fio_to_sheets.py @@ -19,8 +19,14 @@ DEFAULT_SPREADSHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y" SCOPES = ["https://www.googleapis.com/auth/spreadsheets"] TOKEN_FILE = "token.pickle" COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"] +_SHEETS_SERVICE = None + def get_sheets_service(credentials_path: str): """Authenticate and return the Google Sheets API service.""" + global _SHEETS_SERVICE + if _SHEETS_SERVICE is not None: + return _SHEETS_SERVICE + if not os.path.exists(credentials_path): raise FileNotFoundError(f"Credentials file not found: {credentials_path}") @@ -50,7 +56,8 @@ def get_sheets_service(credentials_path: str): with open(TOKEN_FILE, "wb") as token: pickle.dump(creds, token) - return build("sheets", "v4", credentials=creds) + _SHEETS_SERVICE = build("sheets", "v4", credentials=creds) + return _SHEETS_SERVICE def generate_sync_id(tx: dict) -> str: From 76cdcba4245e6754b9295c574a900b3483e40921 Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Wed, 11 Mar 2026 01:18:00 +0100 Subject: [PATCH 2/4] docs: add caching outcomes summary to prompts directory --- ...026-03-10-cache-data-from-google-sheets.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md diff --git a/prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md b/prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md new file mode 100644 index 0000000..024c5a3 --- /dev/null +++ b/prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md @@ -0,0 +1,29 @@ +# Google Sheets Data Caching Implementation + +**Date:** 2026-03-11 +**Objective:** Optimize Flask application performance by heavily caching expensive Google Sheets data processing, avoiding redundant HTTP roundtrips to Google APIs, and ensuring rate limits are not exhausted during simple web app reloads. + +## Implemented Features + +### 1. File-Based JSON Caching (`cache_utils.py`) +- **Mechanism:** Implemented a new generic caching system that saves API responses and heavily calculated datasets as `.json` files directly to the local `/tmp/` directory. +- **Drive Metadata Checks:** The cache is validated by asking the Google Drive API (`drive.files().get`) for the remote `modifiedTime` of the target Sheet. +- **Cache Hit logic:** If the cached version on disk matches the remote `modifiedTime`, the application skips downloading the full CSV payload and computing tuples—instead serving the instant static cache via `json.load`. + +### 2. Global API Auth Object Reuse +- **The Problem:** The `_get_drive_service()` and `get_sheets_service()` implementations were completely rebuilding `googleapiclient.discovery` objects for *every single file check*—re-seeking and exchanging Google Service Account tokens constantly. +- **The Fix:** Service objects (`_DRIVE_SERVICE`, `_SHEETS_SERVICE`) are now globally cached in application memory. The server authenticates exactly *once* when it wakes up, dramatically saving milliseconds and network resources across every web request. The underlying `httplib2` and `google-auth` intelligently handle silent token refreshes natively. + +### 3. Graceful Configurable Rate Limiting +- **In-Memory Debouncing:** Implemented an internal memory state (`_LAST_CHECKED`) inside `cache_utils` that forcefully prevents checking the Drive API `modifiedTime` for a specific file if we already explicitly checked it within the last 5 minutes. This prevents flooding the Google Drive API while clicking wildly around the app GUI. +- **Semantic Mappings:** Created a `CACHE_SHEET_MAP` that maps friendly internal cache keys (e.g. `attendance_regular`) back to their raw 44-character Google Sheet IDs. + +### 4. HTTP / Socket Timeout Safety Fix +- **The Bug:** Originally, `socket.setdefaulttimeout(10)` was used to prevent Google Drive metadata checks from locking up the worker pool. However, this brutally mutated the underlying Werkzeug/Flask default sockets globally. If fetching thousands of lines from Google *Sheets* (the payload logic) took longer than 10 seconds, Flask would just kill the request with a random `TimeoutError('timed out')`. +- **The Fix:** Removed the global mutation. Instantiated a targeted, isolated `httplib2.Http(timeout=10)` injected *specifically* into only the Google Drive API build. The rest of the app can now download massive files without randomly timing out. + +### 5. Developer Experience (DX) Enhancements +- **Logging Line Origins:** Enriched the console logging format strings (`logging.basicConfig`) to output `[%(funcName)s]` and `%(filename)s:%(lineno)d` to easily trace exactly which exact file and function is executing on complex stack traces. +- **Improved VS Code Local Debugging:** + - Integrated `debugpy` launch profiles in `.vscode/launch.json` for "Python Debugger: Flask" (Launching) and "Python Debugger: Attach" (Connecting). + - Implemented a standard `make web-attach` target inside the Makefile via `uv run python -m debugpy --listen ...` to allow the background web app to automatically halt and wait for external debuggers before bootstrapping caching layers. From 251d7ba6b5debe56eba6639cb274de157eb1e627 Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Wed, 11 Mar 2026 11:01:41 +0100 Subject: [PATCH 3/4] fix: properly debounce Drive API metadata checks in cache Remove the file mtime check from the API debounce tier in get_sheet_modified_time(). Previously, the debounce was defeated when CACHE_TTL_SECONDS differed from CACHE_API_CHECK_TTL_SECONDS because the file age check would fail even though the API was checked recently. Also fix cache key mappings (attendance_juniors sheet ID, payments_transactions rename) and add tmp/ to .gitignore. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 3 +++ app.py | 2 +- scripts/cache_utils.py | 23 ++++++++++------------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index b73a1f6..a3dc557 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ # python cache **/*.pyc .secret + +# local tmp folder +tmp/ diff --git a/app.py b/app.py index f7915b0..d27ff2a 100644 --- a/app.py +++ b/app.py @@ -19,7 +19,7 @@ sys.path.append(str(scripts_dir)) from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID -from cache_utils import get_sheet_modified_time, read_cache, write_cache +from cache_utils import get_sheet_modified_time, read_cache, write_cache, _LAST_CHECKED def get_cached_data(cache_key, sheet_id, fetch_func, *args, **kwargs): mod_time = get_sheet_modified_time(cache_key) diff --git a/scripts/cache_utils.py b/scripts/cache_utils.py index 2fa1bec..cae3262 100644 --- a/scripts/cache_utils.py +++ b/scripts/cache_utils.py @@ -13,15 +13,15 @@ logger = logging.getLogger(__name__) CACHE_DIR = Path(__file__).parent.parent / "tmp" CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json" DRIVE_TIMEOUT = 10 # seconds -CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 1800)) # 30 min default for max cache age +CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 30 min default for max cache age CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default # Known mappings mapping "cache name" to Google Sheet ID CACHE_SHEET_MAP = { "attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA", - "attendance_juniors": "1wXm4gB0rW_LCHgLhCqg0Rk-pGkP5xKIf14dO3D3Z_g4", + "attendance_juniors": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA", "exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y", - "transactions_ledger": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y" + "payments_transactions": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y" } # Global state to track last Drive API check time per sheet @@ -72,18 +72,15 @@ def get_sheet_modified_time(cache_key: str) -> str | None: last_check = _LAST_CHECKED.get(sheet_id, 0) if CACHE_API_CHECK_TTL_SECONDS > 0 and (now - last_check) < CACHE_API_CHECK_TTL_SECONDS: - # We checked recently. Do we have a valid cache file? + # We checked recently. Return cached modifiedTime if cache file exists. if cache_file.exists(): try: - # Still respect the older, broader CACHE_TTL_SECONDS - file_mtime = os.path.getmtime(cache_file) - if CACHE_TTL_SECONDS <= 0 or (now - file_mtime) < CACHE_TTL_SECONDS: - with open(cache_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - cached_time = cache_data.get("modifiedTime") - if cached_time: - logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL") - return cached_time + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + cached_time = cache_data.get("modifiedTime") + if cached_time: + logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL") + return cached_time except Exception as e: logger.warning(f"Error reading existing cache during API skip for {sheet_id}: {e}") From 7170cd4d275e597905ff12ad8dbd8b654213884f Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Wed, 11 Mar 2026 11:10:16 +0100 Subject: [PATCH 4/4] refactor: unify get_cached_exceptions into get_cached_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional serialize/deserialize hooks to get_cached_data() so it can handle the exceptions dict (tuple keys → JSON-safe lists) without needing a separate function. Co-Authored-By: Claude Opus 4.6 --- app.py | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index d27ff2a..90ab9c4 100644 --- a/app.py +++ b/app.py @@ -21,27 +21,15 @@ from attendance import get_members_with_fees, get_junior_members_with_fees, SHEE from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID from cache_utils import get_sheet_modified_time, read_cache, write_cache, _LAST_CHECKED -def get_cached_data(cache_key, sheet_id, fetch_func, *args, **kwargs): +def get_cached_data(cache_key, sheet_id, fetch_func, *args, serialize=None, deserialize=None, **kwargs): mod_time = get_sheet_modified_time(cache_key) if mod_time: cached = read_cache(cache_key, mod_time) if cached is not None: - return cached + return deserialize(cached) if deserialize else cached data = fetch_func(*args, **kwargs) if mod_time: - write_cache(cache_key, mod_time, data) - return data - -def get_cached_exceptions(sheet_id, creds_path): - cache_key = "exceptions_dict" - mod_time = get_sheet_modified_time(cache_key) - if mod_time: - cached = read_cache(cache_key, mod_time) - if cached is not None: - return {tuple(k): v for k, v in cached} - data = fetch_exceptions(sheet_id, creds_path) - if mod_time: - write_cache(cache_key, mod_time, [[list(k), v] for k, v in data.items()]) + write_cache(cache_key, mod_time, serialize(data) if serialize else data) return data def get_month_labels(sorted_months, merged_months): @@ -123,7 +111,12 @@ def fees(): # Get exceptions for formatting credentials_path = ".secret/fuj-management-bot-credentials.json" - exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_data( + "exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions, + PAYMENTS_SHEET_ID, credentials_path, + serialize=lambda d: [[list(k), v] for k, v in d.items()], + deserialize=lambda c: {tuple(k): v for k, v in c}, + ) record_step("fetch_exceptions") formatted_results = [] @@ -181,7 +174,12 @@ def fees_juniors(): # Get exceptions for formatting (reusing payments sheet) credentials_path = ".secret/fuj-management-bot-credentials.json" - exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_data( + "exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions, + PAYMENTS_SHEET_ID, credentials_path, + serialize=lambda d: [[list(k), v] for k, v in d.items()], + deserialize=lambda c: {tuple(k): v for k, v in c}, + ) record_step("fetch_exceptions") formatted_results = [] @@ -253,7 +251,12 @@ def reconcile_view(): transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_payments") - exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_data( + "exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions, + PAYMENTS_SHEET_ID, credentials_path, + serialize=lambda d: [[list(k), v] for k, v in d.items()], + deserialize=lambda c: {tuple(k): v for k, v in c}, + ) record_step("fetch_exceptions") result = reconcile(members, sorted_months, transactions, exceptions) record_step("reconcile") @@ -346,7 +349,12 @@ def reconcile_juniors_view(): transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path) record_step("fetch_payments") - exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path) + exceptions = get_cached_data( + "exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions, + PAYMENTS_SHEET_ID, credentials_path, + serialize=lambda d: [[list(k), v] for k, v in d.items()], + deserialize=lambda c: {tuple(k): v for k, v in c}, + ) record_step("fetch_exceptions") # Adapt junior tuple format (name, tier, {month: (fee, total_count, adult_count, junior_count)})