google-documents-read-caching #2
33
.vscode/launch.json
vendored
Normal file
33
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Flask",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"python": "${workspaceFolder}/.venv/bin/python",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py",
|
||||
"FLASK_DEBUG": "1"
|
||||
},
|
||||
"args": [
|
||||
"run",
|
||||
"--no-debugger",
|
||||
"--no-reload",
|
||||
"--host", "0.0.0.0",
|
||||
"--port", "5001"
|
||||
],
|
||||
"jinja": true
|
||||
},
|
||||
{
|
||||
"name": "Python Debugger: Attach",
|
||||
"type": "debugpy",
|
||||
"request": "attach",
|
||||
"connect": {
|
||||
"host": "localhost",
|
||||
"port": 5678
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
6
Makefile
6
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: help fees match web image run sync sync-2026 test test-v docs
|
||||
.PHONY: help fees match web web-debug image run sync sync-2026 test test-v docs
|
||||
|
||||
export PYTHONPATH := scripts:$(PYTHONPATH)
|
||||
VENV := .venv
|
||||
@@ -16,6 +16,7 @@ help:
|
||||
@echo " make fees - Calculate monthly fees from the attendance sheet"
|
||||
@echo " make match - Match Fio bank payments against expected attendance fees"
|
||||
@echo " make web - Start a dynamic web dashboard locally"
|
||||
@echo " make web-debug - Start a dynamic web dashboard locally in debug mode"
|
||||
@echo " make image - Build an OCI container image"
|
||||
@echo " make run - Run the built Docker image locally"
|
||||
@echo " make sync - Sync Fio transactions to Google Sheets"
|
||||
@@ -40,6 +41,9 @@ match: $(PYTHON)
|
||||
web: $(PYTHON)
|
||||
$(PYTHON) app.py
|
||||
|
||||
web-debug: $(PYTHON)
|
||||
FLASK_DEBUG=1 $(PYTHON) app.py
|
||||
|
||||
image:
|
||||
docker build -t fuj-management:latest -f build/Dockerfile .
|
||||
|
||||
|
||||
63
app.py
63
app.py
@@ -6,14 +6,43 @@ import time
|
||||
import os
|
||||
import io
|
||||
import qrcode
|
||||
import logging
|
||||
from flask import Flask, render_template, g, send_file, request
|
||||
|
||||
# Configure logging, allowing override via LOG_LEVEL environment variable
|
||||
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
|
||||
logging.basicConfig(level=getattr(logging, log_level, logging.INFO), format='%(asctime)s - %(name)s:%(filename)s:%(lineno)d [%(funcName)s] - %(levelname)s - %(message)s')
|
||||
|
||||
# Add scripts directory to path to allow importing from it
|
||||
scripts_dir = Path(__file__).parent / "scripts"
|
||||
sys.path.append(str(scripts_dir))
|
||||
|
||||
from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS
|
||||
from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID
|
||||
from cache_utils import get_sheet_modified_time, read_cache, write_cache
|
||||
|
||||
def get_cached_data(cache_key, sheet_id, fetch_func, *args, **kwargs):
|
||||
mod_time = get_sheet_modified_time(cache_key)
|
||||
if mod_time:
|
||||
cached = read_cache(cache_key, mod_time)
|
||||
if cached is not None:
|
||||
return cached
|
||||
data = fetch_func(*args, **kwargs)
|
||||
if mod_time:
|
||||
write_cache(cache_key, mod_time, data)
|
||||
return data
|
||||
|
||||
def get_cached_exceptions(sheet_id, creds_path):
|
||||
cache_key = "exceptions_dict"
|
||||
mod_time = get_sheet_modified_time(cache_key)
|
||||
if mod_time:
|
||||
cached = read_cache(cache_key, mod_time)
|
||||
if cached is not None:
|
||||
return {tuple(k): v for k, v in cached}
|
||||
data = fetch_exceptions(sheet_id, creds_path)
|
||||
if mod_time:
|
||||
write_cache(cache_key, mod_time, [[list(k), v] for k, v in data.items()])
|
||||
return data
|
||||
|
||||
def get_month_labels(sorted_months, merged_months):
|
||||
labels = {}
|
||||
@@ -78,10 +107,11 @@ def fees():
|
||||
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit"
|
||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||
|
||||
members, sorted_months = get_members_with_fees()
|
||||
members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees)
|
||||
record_step("fetch_members")
|
||||
if not members:
|
||||
if not members_data:
|
||||
return "No data."
|
||||
members, sorted_months = members_data
|
||||
|
||||
# Filter to adults only for display
|
||||
results = [(name, fees) for name, tier, fees in members if tier == "A"]
|
||||
@@ -93,7 +123,7 @@ def fees():
|
||||
|
||||
# Get exceptions for formatting
|
||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_exceptions")
|
||||
|
||||
formatted_results = []
|
||||
@@ -135,10 +165,11 @@ def fees_juniors():
|
||||
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit#gid={JUNIOR_SHEET_GID}"
|
||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||
|
||||
members, sorted_months = get_junior_members_with_fees()
|
||||
members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees)
|
||||
record_step("fetch_junior_members")
|
||||
if not members:
|
||||
if not members_data:
|
||||
return "No data."
|
||||
members, sorted_months = members_data
|
||||
|
||||
# Sort members by name
|
||||
results = sorted([(name, fees) for name, tier, fees in members], key=lambda x: x[0])
|
||||
@@ -150,7 +181,7 @@ def fees_juniors():
|
||||
|
||||
# Get exceptions for formatting (reusing payments sheet)
|
||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_exceptions")
|
||||
|
||||
formatted_results = []
|
||||
@@ -214,14 +245,15 @@ def reconcile_view():
|
||||
# Use hardcoded credentials path for now, consistent with other scripts
|
||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||
|
||||
members, sorted_months = get_members_with_fees()
|
||||
members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees)
|
||||
record_step("fetch_members")
|
||||
if not members:
|
||||
if not members_data:
|
||||
return "No data."
|
||||
members, sorted_months = members_data
|
||||
|
||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
||||
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_payments")
|
||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_exceptions")
|
||||
result = reconcile(members, sorted_months, transactions, exceptions)
|
||||
record_step("reconcile")
|
||||
@@ -306,14 +338,15 @@ def reconcile_juniors_view():
|
||||
|
||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||
|
||||
junior_members, sorted_months = get_junior_members_with_fees()
|
||||
junior_members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees)
|
||||
record_step("fetch_junior_members")
|
||||
if not junior_members:
|
||||
if not junior_members_data:
|
||||
return "No data."
|
||||
junior_members, sorted_months = junior_members_data
|
||||
|
||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
||||
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_payments")
|
||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
exceptions = get_cached_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_exceptions")
|
||||
|
||||
# Adapt junior tuple format (name, tier, {month: (fee, total_count, adult_count, junior_count)})
|
||||
@@ -414,7 +447,7 @@ def payments():
|
||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||
|
||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
||||
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||
record_step("fetch_payments")
|
||||
|
||||
# Group transactions by person
|
||||
|
||||
172
scripts/cache_utils.py
Normal file
172
scripts/cache_utils.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from google.oauth2 import service_account
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Constants
|
||||
CACHE_DIR = Path(__file__).parent.parent / "tmp"
|
||||
CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json"
|
||||
DRIVE_TIMEOUT = 10 # seconds
|
||||
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 1800)) # 30 min default for max cache age
|
||||
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
|
||||
|
||||
# Known mappings mapping "cache name" to Google Sheet ID
|
||||
CACHE_SHEET_MAP = {
|
||||
"attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
|
||||
"attendance_juniors": "1wXm4gB0rW_LCHgLhCqg0Rk-pGkP5xKIf14dO3D3Z_g4",
|
||||
"exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y",
|
||||
"transactions_ledger": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||
}
|
||||
|
||||
# Global state to track last Drive API check time per sheet
|
||||
_LAST_CHECKED = {}
|
||||
_DRIVE_SERVICE = None
|
||||
|
||||
def _get_drive_service():
|
||||
global _DRIVE_SERVICE
|
||||
if _DRIVE_SERVICE is not None:
|
||||
return _DRIVE_SERVICE
|
||||
|
||||
if not CREDS_PATH.exists():
|
||||
logger.warning(f"Credentials not found at {CREDS_PATH}. Cannot check Google Drive API.")
|
||||
return None
|
||||
|
||||
try:
|
||||
creds = service_account.Credentials.from_service_account_file(
|
||||
str(CREDS_PATH),
|
||||
scopes=["https://www.googleapis.com/auth/drive.readonly"]
|
||||
)
|
||||
|
||||
# Apply timeout safely to the httplib2 connection without mutating global socket
|
||||
import httplib2
|
||||
import google_auth_httplib2
|
||||
http = httplib2.Http(timeout=DRIVE_TIMEOUT)
|
||||
http = google_auth_httplib2.AuthorizedHttp(creds, http=http)
|
||||
|
||||
_DRIVE_SERVICE = build("drive", "v3", http=http, cache_discovery=False)
|
||||
return _DRIVE_SERVICE
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build Drive API service: {e}")
|
||||
return None
|
||||
|
||||
import time
|
||||
|
||||
def get_sheet_modified_time(cache_key: str) -> str | None:
|
||||
"""Gets the modifiedTime from Google Drive API for a given cache_key.
|
||||
Returns the ISO timestamp string if successful.
|
||||
If the Drive API fails (e.g., lack of permissions for public sheets),
|
||||
it generates a virtual time bucket string to provide a 5-minute TTL cache.
|
||||
"""
|
||||
sheet_id = CACHE_SHEET_MAP.get(cache_key, cache_key)
|
||||
|
||||
cache_file = CACHE_DIR / f"{cache_key}_cache.json"
|
||||
|
||||
# 1. Check if we should skip the Drive API check entirely (global memory TTL)
|
||||
now = time.time()
|
||||
last_check = _LAST_CHECKED.get(sheet_id, 0)
|
||||
|
||||
if CACHE_API_CHECK_TTL_SECONDS > 0 and (now - last_check) < CACHE_API_CHECK_TTL_SECONDS:
|
||||
# We checked recently. Do we have a valid cache file?
|
||||
if cache_file.exists():
|
||||
try:
|
||||
# Still respect the older, broader CACHE_TTL_SECONDS
|
||||
file_mtime = os.path.getmtime(cache_file)
|
||||
if CACHE_TTL_SECONDS <= 0 or (now - file_mtime) < CACHE_TTL_SECONDS:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
cache_data = json.load(f)
|
||||
cached_time = cache_data.get("modifiedTime")
|
||||
if cached_time:
|
||||
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL")
|
||||
return cached_time
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading existing cache during API skip for {sheet_id}: {e}")
|
||||
|
||||
# 2. Check if the cache file is simply too new (legacy check)
|
||||
if CACHE_TTL_SECONDS > 0 and cache_file.exists():
|
||||
try:
|
||||
file_mtime = os.path.getmtime(cache_file)
|
||||
if time.time() - file_mtime < CACHE_TTL_SECONDS:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
cache_data = json.load(f)
|
||||
cached_time = cache_data.get("modifiedTime")
|
||||
if cached_time:
|
||||
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_TTL_SECONDS}s max CACHE_TTL")
|
||||
# We consider this a valid check, update the global state
|
||||
_LAST_CHECKED[sheet_id] = now
|
||||
return cached_time
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking cache TTL for {sheet_id}: {e}")
|
||||
|
||||
def _fallback_ttl():
|
||||
bucket = int(time.time() // 300)
|
||||
return f"ttl-5m-{bucket}"
|
||||
|
||||
logger.info(f"Checking Drive API for {sheet_id}")
|
||||
drive_service = _get_drive_service()
|
||||
if not drive_service:
|
||||
return _fallback_ttl()
|
||||
|
||||
try:
|
||||
file_meta = drive_service.files().get(fileId=sheet_id, fields="modifiedTime", supportsAllDrives=True).execute()
|
||||
# Successfully checked API, update the global state
|
||||
_LAST_CHECKED[sheet_id] = time.time()
|
||||
return file_meta.get("modifiedTime")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get modifiedTime for sheet {sheet_id}: {e}. Falling back to 5-minute TTL.")
|
||||
return _fallback_ttl()
|
||||
|
||||
def read_cache(sheet_id: str, current_modified_time: str) -> list | dict | None:
|
||||
"""Reads the JSON cache for the given sheet_id.
|
||||
Returns the cached data if it exists AND the cached modifiedTime matches
|
||||
current_modified_time.
|
||||
Otherwise, returns None.
|
||||
"""
|
||||
if not current_modified_time:
|
||||
return None
|
||||
|
||||
cache_file = CACHE_DIR / f"{sheet_id}_cache.json"
|
||||
if not cache_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
cache_data = json.load(f)
|
||||
|
||||
cached_time = cache_data.get("modifiedTime")
|
||||
if cached_time == current_modified_time:
|
||||
logger.info(f"Cache hit for {sheet_id} ({current_modified_time})")
|
||||
return cache_data.get("data")
|
||||
else:
|
||||
logger.info(f"Cache miss for {sheet_id}. Cached: {cached_time}, Current: {current_modified_time}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read cache {cache_file}: {e}")
|
||||
return None
|
||||
|
||||
def write_cache(sheet_id: str, modified_time: str, data: list | dict) -> None:
|
||||
"""Writes the data to a JSON cache file with the given modified_time."""
|
||||
if not modified_time:
|
||||
return
|
||||
|
||||
try:
|
||||
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
cache_file = CACHE_DIR / f"{sheet_id}_cache.json"
|
||||
|
||||
cache_data = {
|
||||
"modifiedTime": modified_time,
|
||||
"data": data,
|
||||
"cachedAt": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
with open(cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(cache_data, f, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Wrote cache for {sheet_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write cache {sheet_id}: {e}")
|
||||
@@ -300,8 +300,8 @@ def reconcile(
|
||||
norm_name = normalize(name)
|
||||
norm_period = normalize(m)
|
||||
fee_data = member_fees[name].get(m, (0, 0))
|
||||
original_expected = fee_data[0] if isinstance(fee_data, tuple) else fee_data
|
||||
attendance_count = fee_data[1] if isinstance(fee_data, tuple) else 0
|
||||
original_expected = fee_data[0] if isinstance(fee_data, (tuple, list)) else fee_data
|
||||
attendance_count = fee_data[1] if isinstance(fee_data, (tuple, list)) else 0
|
||||
|
||||
ex_data = exceptions.get((norm_name, norm_period))
|
||||
if ex_data is not None:
|
||||
|
||||
@@ -19,8 +19,14 @@ DEFAULT_SPREADSHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
|
||||
TOKEN_FILE = "token.pickle"
|
||||
COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"]
|
||||
_SHEETS_SERVICE = None
|
||||
|
||||
def get_sheets_service(credentials_path: str):
|
||||
"""Authenticate and return the Google Sheets API service."""
|
||||
global _SHEETS_SERVICE
|
||||
if _SHEETS_SERVICE is not None:
|
||||
return _SHEETS_SERVICE
|
||||
|
||||
if not os.path.exists(credentials_path):
|
||||
raise FileNotFoundError(f"Credentials file not found: {credentials_path}")
|
||||
|
||||
@@ -50,7 +56,8 @@ def get_sheets_service(credentials_path: str):
|
||||
with open(TOKEN_FILE, "wb") as token:
|
||||
pickle.dump(creds, token)
|
||||
|
||||
return build("sheets", "v4", credentials=creds)
|
||||
_SHEETS_SERVICE = build("sheets", "v4", credentials=creds)
|
||||
return _SHEETS_SERVICE
|
||||
|
||||
|
||||
def generate_sync_id(tx: dict) -> str:
|
||||
|
||||
Reference in New Issue
Block a user