google-documents-read-caching #2

Merged
kacerr merged 4 commits from google-documents-read-caching into main 2026-03-11 10:13:18 +00:00
3 changed files with 14 additions and 14 deletions
Showing only changes of commit 251d7ba6b5 - Show all commits

3
.gitignore vendored
View File

@@ -1,3 +1,6 @@
# python cache
**/*.pyc
.secret
# local tmp folder
tmp/

2
app.py
View File

@@ -19,7 +19,7 @@ sys.path.append(str(scripts_dir))
from attendance import get_members_with_fees, get_junior_members_with_fees, SHEET_ID as ATTENDANCE_SHEET_ID, JUNIOR_SHEET_GID, ADULT_MERGED_MONTHS, JUNIOR_MERGED_MONTHS
from match_payments import reconcile, fetch_sheet_data, fetch_exceptions, normalize, DEFAULT_SPREADSHEET_ID as PAYMENTS_SHEET_ID
from cache_utils import get_sheet_modified_time, read_cache, write_cache
from cache_utils import get_sheet_modified_time, read_cache, write_cache, _LAST_CHECKED
def get_cached_data(cache_key, sheet_id, fetch_func, *args, **kwargs):
mod_time = get_sheet_modified_time(cache_key)

View File

@@ -13,15 +13,15 @@ logger = logging.getLogger(__name__)
CACHE_DIR = Path(__file__).parent.parent / "tmp"
CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json"
DRIVE_TIMEOUT = 10 # seconds
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 1800)) # 30 min default for max cache age
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 30 min default for max cache age
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
# Known mappings mapping "cache name" to Google Sheet ID
CACHE_SHEET_MAP = {
"attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
"attendance_juniors": "1wXm4gB0rW_LCHgLhCqg0Rk-pGkP5xKIf14dO3D3Z_g4",
"attendance_juniors": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
"exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y",
"transactions_ledger": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
"payments_transactions": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
}
# Global state to track last Drive API check time per sheet
@@ -72,18 +72,15 @@ def get_sheet_modified_time(cache_key: str) -> str | None:
last_check = _LAST_CHECKED.get(sheet_id, 0)
if CACHE_API_CHECK_TTL_SECONDS > 0 and (now - last_check) < CACHE_API_CHECK_TTL_SECONDS:
# We checked recently. Do we have a valid cache file?
# We checked recently. Return cached modifiedTime if cache file exists.
if cache_file.exists():
try:
# Still respect the older, broader CACHE_TTL_SECONDS
file_mtime = os.path.getmtime(cache_file)
if CACHE_TTL_SECONDS <= 0 or (now - file_mtime) < CACHE_TTL_SECONDS:
with open(cache_file, "r", encoding="utf-8") as f:
cache_data = json.load(f)
cached_time = cache_data.get("modifiedTime")
if cached_time:
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL")
return cached_time
with open(cache_file, "r", encoding="utf-8") as f:
cache_data = json.load(f)
cached_time = cache_data.get("modifiedTime")
if cached_time:
logger.info(f"Skipping Drive API check for {sheet_id} due to {CACHE_API_CHECK_TTL_SECONDS}s API check TTL")
return cached_time
except Exception as e:
logger.warning(f"Error reading existing cache during API skip for {sheet_id}: {e}")