Compare commits
5 Commits
c8c145486f
...
0d0c2af778
| Author | SHA1 | Date | |
|---|---|---|---|
| 0d0c2af778 | |||
| 7170cd4d27 | |||
| 251d7ba6b5 | |||
| 76cdcba424 | |||
| 8662cb4592 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,6 @@
|
|||||||
# python cache
|
# python cache
|
||||||
**/*.pyc
|
**/*.pyc
|
||||||
.secret
|
.secret
|
||||||
|
|
||||||
|
# local tmp folder
|
||||||
|
tmp/
|
||||||
|
|||||||
33
.vscode/launch.json
vendored
Normal file
33
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Flask",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "flask",
|
||||||
|
"python": "${workspaceFolder}/.venv/bin/python",
|
||||||
|
"env": {
|
||||||
|
"FLASK_APP": "app.py",
|
||||||
|
"FLASK_DEBUG": "1"
|
||||||
|
},
|
||||||
|
"args": [
|
||||||
|
"run",
|
||||||
|
"--no-debugger",
|
||||||
|
"--no-reload",
|
||||||
|
"--host", "0.0.0.0",
|
||||||
|
"--port", "5001"
|
||||||
|
],
|
||||||
|
"jinja": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Attach",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "attach",
|
||||||
|
"connect": {
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5678
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
10
Makefile
10
Makefile
@@ -1,4 +1,4 @@
|
|||||||
.PHONY: help fees match web image run sync sync-2026 test test-v docs
|
.PHONY: help fees match web web-debug image run sync sync-2026 test test-v docs
|
||||||
|
|
||||||
export PYTHONPATH := scripts:$(PYTHONPATH)
|
export PYTHONPATH := scripts:$(PYTHONPATH)
|
||||||
VENV := .venv
|
VENV := .venv
|
||||||
@@ -15,8 +15,9 @@ help:
|
|||||||
@echo "Available targets:"
|
@echo "Available targets:"
|
||||||
@echo " make fees - Calculate monthly fees from the attendance sheet"
|
@echo " make fees - Calculate monthly fees from the attendance sheet"
|
||||||
@echo " make match - Match Fio bank payments against expected attendance fees"
|
@echo " make match - Match Fio bank payments against expected attendance fees"
|
||||||
@echo " make web - Start a dynamic web dashboard locally"
|
@echo " make web - Start a dynamic web dashboard locally"
|
||||||
@echo " make image - Build an OCI container image"
|
@echo " make web-debug - Start a dynamic web dashboard locally in debug mode"
|
||||||
|
@echo " make image - Build an OCI container image"
|
||||||
@echo " make run - Run the built Docker image locally"
|
@echo " make run - Run the built Docker image locally"
|
||||||
@echo " make sync - Sync Fio transactions to Google Sheets"
|
@echo " make sync - Sync Fio transactions to Google Sheets"
|
||||||
@echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)"
|
@echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)"
|
||||||
@@ -40,6 +41,9 @@ match: $(PYTHON)
|
|||||||
web: $(PYTHON)
|
web: $(PYTHON)
|
||||||
$(PYTHON) app.py
|
$(PYTHON) app.py
|
||||||
|
|
||||||
|
web-debug: $(PYTHON)
|
||||||
|
FLASK_DEBUG=1 $(PYTHON) app.py
|
||||||
|
|
||||||
image:
|
image:
|
||||||
docker build -t fuj-management:latest -f build/Dockerfile .
|
docker build -t fuj-management:latest -f build/Dockerfile .
|
||||||
|
|
||||||
|
|||||||
71
app.py
71
app.py
@@ -6,14 +6,31 @@ import time
|
|||||||
import os
|
import os
|
||||||
import io
|
import io
|
||||||
import qrcode
|
import qrcode
|
||||||
|
import logging
|
||||||
from flask import Flask, render_template, g, send_file, request
|
from flask import Flask, render_template, g, send_file, request
|
||||||
|
|
||||||
|
# Configure logging, allowing override via LOG_LEVEL environment variable
|
||||||
|
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
|
||||||
|
logging.basicConfig(level=getattr(logging, log_level, logging.INFO), format='%(asctime)s - %(name)s:%(filename)s:%(lineno)d [%(funcName)s] - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
# Add scripts directory to path to allow importing from it
|
# Add scripts directory to path to allow importing from it
|
||||||
scripts_dir = Path(__file__).parent / "scripts"
|
scripts_dir = Path(__file__).parent / "scripts"
|
||||||
sys.path.append(str(scripts_dir))
|
sys.path.append(str(scripts_dir))
|
||||||
|
|
||||||
from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS
|
from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS
|
||||||
from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID
|
from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID
|
||||||
|
from cache_utils import get_sheet_modified_time, read_cache, write_cache, _LAST_CHECKED
|
||||||
|
|
||||||
|
def get_cached_data(cache_key, sheet_id, fetch_func, *args, serialize=None, deserialize=None, **kwargs):
|
||||||
|
mod_time = get_sheet_modified_time(cache_key)
|
||||||
|
if mod_time:
|
||||||
|
cached = read_cache(cache_key, mod_time)
|
||||||
|
if cached is not None:
|
||||||
|
return deserialize(cached) if deserialize else cached
|
||||||
|
data = fetch_func(*args, **kwargs)
|
||||||
|
if mod_time:
|
||||||
|
write_cache(cache_key, mod_time, serialize(data) if serialize else data)
|
||||||
|
return data
|
||||||
|
|
||||||
def get_month_labels(sorted_months, merged_months):
|
def get_month_labels(sorted_months, merged_months):
|
||||||
labels = {}
|
labels = {}
|
||||||
@@ -78,10 +95,11 @@ def fees():
|
|||||||
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit"
|
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit"
|
||||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||||
|
|
||||||
members, sorted_months = get_members_with_fees()
|
members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees)
|
||||||
record_step("fetch_members")
|
record_step("fetch_members")
|
||||||
if not members:
|
if not members_data:
|
||||||
return "No data."
|
return "No data."
|
||||||
|
members, sorted_months = members_data
|
||||||
|
|
||||||
# Filter to adults only for display
|
# Filter to adults only for display
|
||||||
results = [(name, fees) for name, tier, fees in members if tier == "A"]
|
results = [(name, fees) for name, tier, fees in members if tier == "A"]
|
||||||
@@ -93,7 +111,12 @@ def fees():
|
|||||||
|
|
||||||
# Get exceptions for formatting
|
# Get exceptions for formatting
|
||||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
exceptions = get_cached_data(
|
||||||
|
"exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions,
|
||||||
|
PAYMENTS_SHEET_ID, credentials_path,
|
||||||
|
serialize=lambda d: [[list(k), v] for k, v in d.items()],
|
||||||
|
deserialize=lambda c: {tuple(k): v for k, v in c},
|
||||||
|
)
|
||||||
record_step("fetch_exceptions")
|
record_step("fetch_exceptions")
|
||||||
|
|
||||||
formatted_results = []
|
formatted_results = []
|
||||||
@@ -135,10 +158,11 @@ def fees_juniors():
|
|||||||
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit#gid={JUNIOR_SHEET_GID}"
|
attendance_url = f"https://docs.google.com/spreadsheets/d/{ATTENDANCE_SHEET_ID}/edit#gid={JUNIOR_SHEET_GID}"
|
||||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||||
|
|
||||||
members, sorted_months = get_junior_members_with_fees()
|
members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees)
|
||||||
record_step("fetch_junior_members")
|
record_step("fetch_junior_members")
|
||||||
if not members:
|
if not members_data:
|
||||||
return "No data."
|
return "No data."
|
||||||
|
members, sorted_months = members_data
|
||||||
|
|
||||||
# Sort members by name
|
# Sort members by name
|
||||||
results = sorted([(name, fees) for name, tier, fees in members], key=lambda x: x[0])
|
results = sorted([(name, fees) for name, tier, fees in members], key=lambda x: x[0])
|
||||||
@@ -150,7 +174,12 @@ def fees_juniors():
|
|||||||
|
|
||||||
# Get exceptions for formatting (reusing payments sheet)
|
# Get exceptions for formatting (reusing payments sheet)
|
||||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
exceptions = get_cached_data(
|
||||||
|
"exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions,
|
||||||
|
PAYMENTS_SHEET_ID, credentials_path,
|
||||||
|
serialize=lambda d: [[list(k), v] for k, v in d.items()],
|
||||||
|
deserialize=lambda c: {tuple(k): v for k, v in c},
|
||||||
|
)
|
||||||
record_step("fetch_exceptions")
|
record_step("fetch_exceptions")
|
||||||
|
|
||||||
formatted_results = []
|
formatted_results = []
|
||||||
@@ -214,14 +243,20 @@ def reconcile_view():
|
|||||||
# Use hardcoded credentials path for now, consistent with other scripts
|
# Use hardcoded credentials path for now, consistent with other scripts
|
||||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||||
|
|
||||||
members, sorted_months = get_members_with_fees()
|
members_data = get_cached_data("attendance_regular", ATTENDANCE_SHEET_ID, get_members_with_fees)
|
||||||
record_step("fetch_members")
|
record_step("fetch_members")
|
||||||
if not members:
|
if not members_data:
|
||||||
return "No data."
|
return "No data."
|
||||||
|
members, sorted_months = members_data
|
||||||
|
|
||||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||||
record_step("fetch_payments")
|
record_step("fetch_payments")
|
||||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
exceptions = get_cached_data(
|
||||||
|
"exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions,
|
||||||
|
PAYMENTS_SHEET_ID, credentials_path,
|
||||||
|
serialize=lambda d: [[list(k), v] for k, v in d.items()],
|
||||||
|
deserialize=lambda c: {tuple(k): v for k, v in c},
|
||||||
|
)
|
||||||
record_step("fetch_exceptions")
|
record_step("fetch_exceptions")
|
||||||
result = reconcile(members, sorted_months, transactions, exceptions)
|
result = reconcile(members, sorted_months, transactions, exceptions)
|
||||||
record_step("reconcile")
|
record_step("reconcile")
|
||||||
@@ -306,14 +341,20 @@ def reconcile_juniors_view():
|
|||||||
|
|
||||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||||
|
|
||||||
junior_members, sorted_months = get_junior_members_with_fees()
|
junior_members_data = get_cached_data("attendance_juniors", ATTENDANCE_SHEET_ID, get_junior_members_with_fees)
|
||||||
record_step("fetch_junior_members")
|
record_step("fetch_junior_members")
|
||||||
if not junior_members:
|
if not junior_members_data:
|
||||||
return "No data."
|
return "No data."
|
||||||
|
junior_members, sorted_months = junior_members_data
|
||||||
|
|
||||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||||
record_step("fetch_payments")
|
record_step("fetch_payments")
|
||||||
exceptions = fetch_exceptions(PAYMENTS_SHEET_ID, credentials_path)
|
exceptions = get_cached_data(
|
||||||
|
"exceptions_dict", PAYMENTS_SHEET_ID, fetch_exceptions,
|
||||||
|
PAYMENTS_SHEET_ID, credentials_path,
|
||||||
|
serialize=lambda d: [[list(k), v] for k, v in d.items()],
|
||||||
|
deserialize=lambda c: {tuple(k): v for k, v in c},
|
||||||
|
)
|
||||||
record_step("fetch_exceptions")
|
record_step("fetch_exceptions")
|
||||||
|
|
||||||
# Adapt junior tuple format (name, tier, {month: (fee, total_count, adult_count, junior_count)})
|
# Adapt junior tuple format (name, tier, {month: (fee, total_count, adult_count, junior_count)})
|
||||||
@@ -414,7 +455,7 @@ def payments():
|
|||||||
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
payments_url = f"https://docs.google.com/spreadsheets/d/{PAYMENTS_SHEET_ID}/edit"
|
||||||
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
credentials_path = ".secret/fuj-management-bot-credentials.json"
|
||||||
|
|
||||||
transactions = fetch_sheet_data(PAYMENTS_SHEET_ID, credentials_path)
|
transactions = get_cached_data("payments_transactions", PAYMENTS_SHEET_ID, fetch_sheet_data, PAYMENTS_SHEET_ID, credentials_path)
|
||||||
record_step("fetch_payments")
|
record_step("fetch_payments")
|
||||||
|
|
||||||
# Group transactions by person
|
# Group transactions by person
|
||||||
|
|||||||
29
prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md
Normal file
29
prompts/outcomes/2026-03-10-cache-data-from-google-sheets.md
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# Google Sheets Data Caching Implementation
|
||||||
|
|
||||||
|
**Date:** 2026-03-11
|
||||||
|
**Objective:** Optimize Flask application performance by heavily caching expensive Google Sheets data processing, avoiding redundant HTTP roundtrips to Google APIs, and ensuring rate limits are not exhausted during simple web app reloads.
|
||||||
|
|
||||||
|
## Implemented Features
|
||||||
|
|
||||||
|
### 1. File-Based JSON Caching (`cache_utils.py`)
|
||||||
|
- **Mechanism:** Implemented a new generic caching system that saves API responses and heavily calculated datasets as `.json` files directly to the local `/tmp/` directory.
|
||||||
|
- **Drive Metadata Checks:** The cache is validated by asking the Google Drive API (`drive.files().get`) for the remote `modifiedTime` of the target Sheet.
|
||||||
|
- **Cache Hit logic:** If the cached version on disk matches the remote `modifiedTime`, the application skips downloading the full CSV payload and computing tuples—instead serving the instant static cache via `json.load`.
|
||||||
|
|
||||||
|
### 2. Global API Auth Object Reuse
|
||||||
|
- **The Problem:** The `_get_drive_service()` and `get_sheets_service()` implementations were completely rebuilding `googleapiclient.discovery` objects for *every single file check*—re-seeking and exchanging Google Service Account tokens constantly.
|
||||||
|
- **The Fix:** Service objects (`_DRIVE_SERVICE`, `_SHEETS_SERVICE`) are now globally cached in application memory. The server authenticates exactly *once* when it wakes up, dramatically saving milliseconds and network resources across every web request. The underlying `httplib2` and `google-auth` intelligently handle silent token refreshes natively.
|
||||||
|
|
||||||
|
### 3. Graceful Configurable Rate Limiting
|
||||||
|
- **In-Memory Debouncing:** Implemented an internal memory state (`_LAST_CHECKED`) inside `cache_utils` that forcefully prevents checking the Drive API `modifiedTime` for a specific file if we already explicitly checked it within the last 5 minutes. This prevents flooding the Google Drive API while clicking wildly around the app GUI.
|
||||||
|
- **Semantic Mappings:** Created a `CACHE_SHEET_MAP` that maps friendly internal cache keys (e.g. `attendance_regular`) back to their raw 44-character Google Sheet IDs.
|
||||||
|
|
||||||
|
### 4. HTTP / Socket Timeout Safety Fix
|
||||||
|
- **The Bug:** Originally, `socket.setdefaulttimeout(10)` was used to prevent Google Drive metadata checks from locking up the worker pool. However, this brutally mutated the underlying Werkzeug/Flask default sockets globally. If fetching thousands of lines from Google *Sheets* (the payload logic) took longer than 10 seconds, Flask would just kill the request with a random `TimeoutError('timed out')`.
|
||||||
|
- **The Fix:** Removed the global mutation. Instantiated a targeted, isolated `httplib2.Http(timeout=10)` injected *specifically* into only the Google Drive API build. The rest of the app can now download massive files without randomly timing out.
|
||||||
|
|
||||||
|
### 5. Developer Experience (DX) Enhancements
|
||||||
|
- **Logging Line Origins:** Enriched the console logging format strings (`logging.basicConfig`) to output `[%(funcName)s]` and `%(filename)s:%(lineno)d` to easily trace exactly which exact file and function is executing on complex stack traces.
|
||||||
|
- **Improved VS Code Local Debugging:**
|
||||||
|
- Integrated `debugpy` launch profiles in `.vscode/launch.json` for "Python Debugger: Flask" (Launching) and "Python Debugger: Attach" (Connecting).
|
||||||
|
- Implemented a standard `make web-attach` target inside the Makefile via `uv run python -m debugpy --listen ...` to allow the background web app to automatically halt and wait for external debuggers before bootstrapping caching layers.
|
||||||
169
scripts/cache_utils.py
Normal file
169
scripts/cache_utils.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from google.oauth2 import service_account
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Constants
|
||||||
|
CACHE_DIR = Path(__file__).parent.parent / "tmp"
|
||||||
|
CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json"
|
||||||
|
DRIVE_TIMEOUT = 10 # seconds
|
||||||
|
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 30 min default for max cache age
|
||||||
|
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
|
||||||
|
|
||||||
|
# Known mappings mapping "cache name" to Google Sheet ID
|
||||||
|
CACHE_SHEET_MAP = {
|
||||||
|
"attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
|
||||||
|
"attendance_juniors": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
|
||||||
|
"exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y",
|
||||||
|
"payments_transactions": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Global state to track last Drive API check time per sheet
|
||||||
|
_LAST_CHECKED = {}
|
||||||
|
_DRIVE_SERVICE = None
|
||||||
|
|
||||||
|
def _get_drive_service():
|
||||||
|
global _DRIVE_SERVICE
|
||||||
|
if _DRIVE_SERVICE is not None:
|
||||||
|
return _DRIVE_SERVICE
|
||||||
|
|
||||||
|
if not CREDS_PATH.exists():
|
||||||
|
logger.warning(f"Credentials not found at {CREDS_PATH}. Cannot check Google Drive API.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
creds = service_account.Credentials.from_service_account_file(
|
||||||
|
str(CREDS_PATH),
|
||||||
|
scopes=["https://www.googleapis.com/auth/drive.readonly"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply timeout safely to the httplib2 connection without mutating global socket
|
||||||
|
import httplib2
|
||||||
|
import google_auth_httplib2
|
||||||
|
http = httplib2.Http(timeout=DRIVE_TIMEOUT)
|
||||||
|
http = google_auth_httplib2.AuthorizedHttp(creds, http=http)
|
||||||
|
|
||||||
|
_DRIVE_SERVICE = build("drive", "v3", http=http, cache_discovery=False)
|
||||||
|
return _DRIVE_SERVICE
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to build Drive API service: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
def get_sheet_modified_time(cache_key: str) -> str | None:
|
||||||
|
"""Gets the modifiedTime from Google Drive API for a given cache_key.
|
||||||
|
Returns the ISO timestamp string if successful.
|
||||||
|
If the Drive API fails (e.g., lack of permissions for public sheets),
|
||||||
|
it generates a virtual time bucket string to provide a 5-minute TTL cache.
|
||||||
|
"""
|
||||||
|
sheet_id = CACHE_SHEET_MAP.get(cache_key, cache_key)
|
||||||
|
|
||||||
|
cache_file = CACHE_DIR / f"{cache_key}_cache.json"
|
||||||
|
|
||||||
|
# 1. Check if we should skip the Drive API check entirely (global memory TTL)
|
||||||
|
now = time.time()
|
||||||
|
last_check = _LAST_CHECKED.get(sheet_id, 0)
|
||||||
|
|
||||||
|
if CACHE_API_CHECK_TTL_SECONDS > 0 and (now - last_check) < CACHE_API_CHECK_TTL_SECONDS:
|
||||||
|
# We checked recently. Return cached modifiedTime if cache file exists.
|
||||||
|
if cache_file.exists():
|
||||||
|
try:
|
||||||
|
with open(cache_file, "r", encoding="utf-8") as f:
|
||||||
|
cache_data = json.load(f)
|
||||||
|
cached_time = cache_data.get("modifiedTime")
|
||||||
|
if cached_time:
|
||||||
|
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL")
|
||||||
|
return cached_time
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error reading existing cache during API skip for {sheet_id}: {e}")
|
||||||
|
|
||||||
|
# 2. Check if the cache file is simply too new (legacy check)
|
||||||
|
if CACHE_TTL_SECONDS > 0 and cache_file.exists():
|
||||||
|
try:
|
||||||
|
file_mtime = os.path.getmtime(cache_file)
|
||||||
|
if time.time() - file_mtime < CACHE_TTL_SECONDS:
|
||||||
|
with open(cache_file, "r", encoding="utf-8") as f:
|
||||||
|
cache_data = json.load(f)
|
||||||
|
cached_time = cache_data.get("modifiedTime")
|
||||||
|
if cached_time:
|
||||||
|
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_TTL_SECONDS}s max CACHE_TTL")
|
||||||
|
# We consider this a valid check, update the global state
|
||||||
|
_LAST_CHECKED[sheet_id] = now
|
||||||
|
return cached_time
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error checking cache TTL for {sheet_id}: {e}")
|
||||||
|
|
||||||
|
def _fallback_ttl():
|
||||||
|
bucket = int(time.time() // 300)
|
||||||
|
return f"ttl-5m-{bucket}"
|
||||||
|
|
||||||
|
logger.info(f"Checking Drive API for {sheet_id}")
|
||||||
|
drive_service = _get_drive_service()
|
||||||
|
if not drive_service:
|
||||||
|
return _fallback_ttl()
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_meta = drive_service.files().get(fileId=sheet_id, fields="modifiedTime", supportsAllDrives=True).execute()
|
||||||
|
# Successfully checked API, update the global state
|
||||||
|
_LAST_CHECKED[sheet_id] = time.time()
|
||||||
|
return file_meta.get("modifiedTime")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not get modifiedTime for sheet {sheet_id}: {e}. Falling back to 5-minute TTL.")
|
||||||
|
return _fallback_ttl()
|
||||||
|
|
||||||
|
def read_cache(sheet_id: str, current_modified_time: str) -> list | dict | None:
|
||||||
|
"""Reads the JSON cache for the given sheet_id.
|
||||||
|
Returns the cached data if it exists AND the cached modifiedTime matches
|
||||||
|
current_modified_time.
|
||||||
|
Otherwise, returns None.
|
||||||
|
"""
|
||||||
|
if not current_modified_time:
|
||||||
|
return None
|
||||||
|
|
||||||
|
cache_file = CACHE_DIR / f"{sheet_id}_cache.json"
|
||||||
|
if not cache_file.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(cache_file, "r", encoding="utf-8") as f:
|
||||||
|
cache_data = json.load(f)
|
||||||
|
|
||||||
|
cached_time = cache_data.get("modifiedTime")
|
||||||
|
if cached_time == current_modified_time:
|
||||||
|
logger.info(f"Cache hit for {sheet_id} ({current_modified_time})")
|
||||||
|
return cache_data.get("data")
|
||||||
|
else:
|
||||||
|
logger.info(f"Cache miss for {sheet_id}. Cached: {cached_time}, Current: {current_modified_time}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to read cache {cache_file}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def write_cache(sheet_id: str, modified_time: str, data: list | dict) -> None:
|
||||||
|
"""Writes the data to a JSON cache file with the given modified_time."""
|
||||||
|
if not modified_time:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
cache_file = CACHE_DIR / f"{sheet_id}_cache.json"
|
||||||
|
|
||||||
|
cache_data = {
|
||||||
|
"modifiedTime": modified_time,
|
||||||
|
"data": data,
|
||||||
|
"cachedAt": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(cache_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(cache_data, f, ensure_ascii=False)
|
||||||
|
|
||||||
|
logger.info(f"Wrote cache for {sheet_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to write cache {sheet_id}: {e}")
|
||||||
@@ -300,8 +300,8 @@ def reconcile(
|
|||||||
norm_name = normalize(name)
|
norm_name = normalize(name)
|
||||||
norm_period = normalize(m)
|
norm_period = normalize(m)
|
||||||
fee_data = member_fees[name].get(m, (0, 0))
|
fee_data = member_fees[name].get(m, (0, 0))
|
||||||
original_expected = fee_data[0] if isinstance(fee_data, tuple) else fee_data
|
original_expected = fee_data[0] if isinstance(fee_data, (tuple, list)) else fee_data
|
||||||
attendance_count = fee_data[1] if isinstance(fee_data, tuple) else 0
|
attendance_count = fee_data[1] if isinstance(fee_data, (tuple, list)) else 0
|
||||||
|
|
||||||
ex_data = exceptions.get((norm_name, norm_period))
|
ex_data = exceptions.get((norm_name, norm_period))
|
||||||
if ex_data is not None:
|
if ex_data is not None:
|
||||||
|
|||||||
@@ -19,8 +19,14 @@ DEFAULT_SPREADSHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
|||||||
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
|
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
|
||||||
TOKEN_FILE = "token.pickle"
|
TOKEN_FILE = "token.pickle"
|
||||||
COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"]
|
COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"]
|
||||||
|
_SHEETS_SERVICE = None
|
||||||
|
|
||||||
def get_sheets_service(credentials_path: str):
|
def get_sheets_service(credentials_path: str):
|
||||||
"""Authenticate and return the Google Sheets API service."""
|
"""Authenticate and return the Google Sheets API service."""
|
||||||
|
global _SHEETS_SERVICE
|
||||||
|
if _SHEETS_SERVICE is not None:
|
||||||
|
return _SHEETS_SERVICE
|
||||||
|
|
||||||
if not os.path.exists(credentials_path):
|
if not os.path.exists(credentials_path):
|
||||||
raise FileNotFoundError(f"Credentials file not found: {credentials_path}")
|
raise FileNotFoundError(f"Credentials file not found: {credentials_path}")
|
||||||
|
|
||||||
@@ -50,7 +56,8 @@ def get_sheets_service(credentials_path: str):
|
|||||||
with open(TOKEN_FILE, "wb") as token:
|
with open(TOKEN_FILE, "wb") as token:
|
||||||
pickle.dump(creds, token)
|
pickle.dump(creds, token)
|
||||||
|
|
||||||
return build("sheets", "v4", credentials=creds)
|
_SHEETS_SERVICE = build("sheets", "v4", credentials=creds)
|
||||||
|
return _SHEETS_SERVICE
|
||||||
|
|
||||||
|
|
||||||
def generate_sync_id(tx: dict) -> str:
|
def generate_sync_id(tx: dict) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user