refactor: code quality improvements across the backend
All checks were successful
Deploy to K8s / deploy (push) Successful in 13s
Build and Push / build (push) Successful in 32s

- Remove insecure SSL verification bypass in attendance.py
- Add gunicorn as production WSGI server (Dockerfile + entrypoint)
- Fix silent data loss in reconciliation (log + surface unmatched members)
- Add required column validation in payment sheet parsing
- Add input validation on /qr route (account format, amount bounds, SPD injection)
- Centralize configuration into scripts/config.py
- Extract credentials path to env-configurable constant
- Hide unmatched transactions from reconcile-juniors page
- Fix test mocks to bypass cache layer (all 8 tests now pass reliably)
- Add pytest + pytest-cov dev dependencies
- Fix typo "Inffering" in infer_payments.py
- Update CLAUDE.md to reflect current project state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 11:40:32 +01:00
parent 0d0c2af778
commit 033349cafa
13 changed files with 293 additions and 88 deletions

View File

@@ -5,8 +5,8 @@ import io
import urllib.request
from datetime import datetime
SHEET_ID = "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA"
JUNIOR_SHEET_GID = "1213318614"
from config import ATTENDANCE_SHEET_ID as SHEET_ID, JUNIOR_SHEET_GID
EXPORT_URL = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv&gid=0"
JUNIOR_EXPORT_URL = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv&gid={JUNIOR_SHEET_GID}"
@@ -34,13 +34,8 @@ FIRST_DATE_COL = 3
def fetch_csv(url: str = EXPORT_URL) -> list[list[str]]:
"""Fetch the attendance Google Sheet as parsed CSV rows."""
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
req = urllib.request.Request(url)
with urllib.request.urlopen(req, context=ctx) as resp:
with urllib.request.urlopen(req) as resp:
text = resp.read().decode("utf-8")
reader = csv.reader(io.StringIO(text))
return list(reader)

View File

@@ -1,29 +1,17 @@
import json
import os
import socket
import logging
from datetime import datetime
from pathlib import Path
from google.oauth2 import service_account
from googleapiclient.discovery import build
from config import (
CACHE_DIR, CREDENTIALS_PATH as CREDS_PATH, DRIVE_TIMEOUT,
CACHE_TTL_SECONDS, CACHE_API_CHECK_TTL_SECONDS, CACHE_SHEET_MAP,
)
logger = logging.getLogger(__name__)
# Constants
CACHE_DIR = Path(__file__).parent.parent / "tmp"
CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json"
DRIVE_TIMEOUT = 10 # seconds
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 30 min default for max cache age
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
# Known mappings mapping "cache name" to Google Sheet ID
CACHE_SHEET_MAP = {
"attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
"attendance_juniors": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
"exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y",
"payments_transactions": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
}
# Global state to track last Drive API check time per sheet
_LAST_CHECKED = {}
_DRIVE_SERVICE = None
@@ -87,7 +75,7 @@ def get_sheet_modified_time(cache_key: str) -> str | None:
# 2. Check if the cache file is simply too new (legacy check)
if CACHE_TTL_SECONDS > 0 and cache_file.exists():
try:
file_mtime = os.path.getmtime(cache_file)
file_mtime = cache_file.stat().st_mtime
if time.time() - file_mtime < CACHE_TTL_SECONDS:
with open(cache_file, "r", encoding="utf-8") as f:
cache_data = json.load(f)

39
scripts/config.py Normal file
View File

@@ -0,0 +1,39 @@
"""Centralized configuration for FUJ management scripts.
External service IDs, credentials, and tunable parameters.
Domain-specific constants (fees, column indices) stay in their respective modules.
"""
import os
from pathlib import Path
# Paths
PROJECT_ROOT = Path(__file__).parent.parent
CREDENTIALS_PATH = Path(os.environ.get(
"CREDENTIALS_PATH",
str(PROJECT_ROOT / ".secret" / "fuj-management-bot-credentials.json"),
))
# Google Sheets IDs
ATTENDANCE_SHEET_ID = "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA"
PAYMENTS_SHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
# Attendance sheet tab GIDs
JUNIOR_SHEET_GID = "1213318614"
# Bank
BANK_ACCOUNT = os.environ.get("BANK_ACCOUNT", "CZ8520100000002800359168")
# Cache settings
CACHE_DIR = PROJECT_ROOT / "tmp"
DRIVE_TIMEOUT = 10 # seconds
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 5 min default
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
# Maps cache keys to their source sheet IDs (used by cache_utils)
CACHE_SHEET_MAP = {
"attendance_regular": ATTENDANCE_SHEET_ID,
"attendance_juniors": ATTENDANCE_SHEET_ID,
"exceptions_dict": PAYMENTS_SHEET_ID,
"payments_transactions": PAYMENTS_SHEET_ID,
}

View File

@@ -102,7 +102,7 @@ def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = F
member_names = [m[0] for m in members_data]
# 3. Process rows
print("Inffering details for empty rows...")
print("Inferring details for empty rows...")
updates = []
for i, row in enumerate(rows[1:], start=2):

View File

@@ -3,12 +3,15 @@
import argparse
import json
import logging
import os
import re
import urllib.request
from datetime import datetime, timedelta
from html.parser import HTMLParser
logger = logging.getLogger(__name__)
from attendance import get_members_with_fees
from czech_utils import normalize, parse_month_references
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
@@ -203,7 +206,7 @@ def fetch_sheet_data(spreadsheet_id: str, credentials_path: str) -> list[dict]:
return -1
idx_date = get_col_index("Date")
idx_amount = get_col_index("Amount")
idx_amount = get_col_index("Amount")
idx_manual = get_col_index("manual fix")
idx_person = get_col_index("Person")
idx_purpose = get_col_index("Purpose")
@@ -212,6 +215,11 @@ def fetch_sheet_data(spreadsheet_id: str, credentials_path: str) -> list[dict]:
idx_message = get_col_index("Message")
idx_bank_id = get_col_index("Bank ID")
required = {"Date": idx_date, "Amount": idx_amount, "Person": idx_person, "Purpose": idx_purpose}
missing = [name for name, idx in required.items() if idx == -1]
if missing:
raise ValueError(f"Required columns missing from payments sheet: {', '.join(missing)}. Found headers: {header}")
transactions = []
for row in rows[1:]:
def get_val(idx):
@@ -381,12 +389,13 @@ def reconcile(
per_allocation = amount / num_allocations if num_allocations > 0 else 0
for member_name, confidence in matched_members:
# If we matched via sheet 'Person' column, name might be partial or have markers
# but usually it's the exact member name from get_members_with_fees.
# Let's ensure it exists in our ledger.
if member_name not in ledger:
# Try matching by base name if it was Jan Novak (Kačerr) etc.
pass
logger.warning(
"Payment matched to unknown member %r (tx: %s, %s) — adding to unmatched",
member_name, tx.get("date", "?"), tx.get("message", "?"),
)
unmatched.append(tx)
continue
for month_key in matched_months:
entry = {
@@ -396,7 +405,7 @@ def reconcile(
"message": tx["message"],
"confidence": confidence,
}
if month_key in ledger.get(member_name, {}):
if month_key in ledger[member_name]:
ledger[member_name][month_key]["paid"] += per_allocation
ledger[member_name][month_key]["transactions"].append(entry)
else:

View File

@@ -14,8 +14,7 @@ from googleapiclient.discovery import build
from fio_utils import fetch_transactions
# Configuration
DEFAULT_SPREADSHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
from config import PAYMENTS_SHEET_ID as DEFAULT_SPREADSHEET_ID
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
TOKEN_FILE = "token.pickle"
COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"]