refactor: code quality improvements across the backend
- Remove insecure SSL verification bypass in attendance.py - Add gunicorn as production WSGI server (Dockerfile + entrypoint) - Fix silent data loss in reconciliation (log + surface unmatched members) - Add required column validation in payment sheet parsing - Add input validation on /qr route (account format, amount bounds, SPD injection) - Centralize configuration into scripts/config.py - Extract credentials path to env-configurable constant - Hide unmatched transactions from reconcile-juniors page - Fix test mocks to bypass cache layer (all 8 tests now pass reliably) - Add pytest + pytest-cov dev dependencies - Fix typo "Inffering" in infer_payments.py - Update CLAUDE.md to reflect current project state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,8 +5,8 @@ import io
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
|
||||
SHEET_ID = "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA"
|
||||
JUNIOR_SHEET_GID = "1213318614"
|
||||
from config import ATTENDANCE_SHEET_ID as SHEET_ID, JUNIOR_SHEET_GID
|
||||
|
||||
EXPORT_URL = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv&gid=0"
|
||||
JUNIOR_EXPORT_URL = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv&gid={JUNIOR_SHEET_GID}"
|
||||
|
||||
@@ -34,13 +34,8 @@ FIRST_DATE_COL = 3
|
||||
|
||||
def fetch_csv(url: str = EXPORT_URL) -> list[list[str]]:
|
||||
"""Fetch the attendance Google Sheet as parsed CSV rows."""
|
||||
import ssl
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
with urllib.request.urlopen(req, context=ctx) as resp:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
text = resp.read().decode("utf-8")
|
||||
reader = csv.reader(io.StringIO(text))
|
||||
return list(reader)
|
||||
|
||||
@@ -1,29 +1,17 @@
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from google.oauth2 import service_account
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
from config import (
|
||||
CACHE_DIR, CREDENTIALS_PATH as CREDS_PATH, DRIVE_TIMEOUT,
|
||||
CACHE_TTL_SECONDS, CACHE_API_CHECK_TTL_SECONDS, CACHE_SHEET_MAP,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Constants
|
||||
CACHE_DIR = Path(__file__).parent.parent / "tmp"
|
||||
CREDS_PATH = Path(__file__).parent.parent / ".secret" / "fuj-management-bot-credentials.json"
|
||||
DRIVE_TIMEOUT = 10 # seconds
|
||||
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 30 min default for max cache age
|
||||
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
|
||||
|
||||
# Known mappings mapping "cache name" to Google Sheet ID
|
||||
CACHE_SHEET_MAP = {
|
||||
"attendance_regular": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
|
||||
"attendance_juniors": "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA",
|
||||
"exceptions_dict": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y",
|
||||
"payments_transactions": "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||
}
|
||||
|
||||
# Global state to track last Drive API check time per sheet
|
||||
_LAST_CHECKED = {}
|
||||
_DRIVE_SERVICE = None
|
||||
@@ -87,7 +75,7 @@ def get_sheet_modified_time(cache_key: str) -> str | None:
|
||||
# 2. Check if the cache file is simply too new (legacy check)
|
||||
if CACHE_TTL_SECONDS > 0 and cache_file.exists():
|
||||
try:
|
||||
file_mtime = os.path.getmtime(cache_file)
|
||||
file_mtime = cache_file.stat().st_mtime
|
||||
if time.time() - file_mtime < CACHE_TTL_SECONDS:
|
||||
with open(cache_file, "r", encoding="utf-8") as f:
|
||||
cache_data = json.load(f)
|
||||
|
||||
39
scripts/config.py
Normal file
39
scripts/config.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Centralized configuration for FUJ management scripts.
|
||||
|
||||
External service IDs, credentials, and tunable parameters.
|
||||
Domain-specific constants (fees, column indices) stay in their respective modules.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Paths
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
CREDENTIALS_PATH = Path(os.environ.get(
|
||||
"CREDENTIALS_PATH",
|
||||
str(PROJECT_ROOT / ".secret" / "fuj-management-bot-credentials.json"),
|
||||
))
|
||||
|
||||
# Google Sheets IDs
|
||||
ATTENDANCE_SHEET_ID = "1E2e_gT_K5AwSRCDLDTa2UetZTkHmBOcz0kFbBUNUNBA"
|
||||
PAYMENTS_SHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||
|
||||
# Attendance sheet tab GIDs
|
||||
JUNIOR_SHEET_GID = "1213318614"
|
||||
|
||||
# Bank
|
||||
BANK_ACCOUNT = os.environ.get("BANK_ACCOUNT", "CZ8520100000002800359168")
|
||||
|
||||
# Cache settings
|
||||
CACHE_DIR = PROJECT_ROOT / "tmp"
|
||||
DRIVE_TIMEOUT = 10 # seconds
|
||||
CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", 300)) # 5 min default
|
||||
CACHE_API_CHECK_TTL_SECONDS = int(os.environ.get("CACHE_API_CHECK_TTL_SECONDS", 300)) # 5 min default
|
||||
|
||||
# Maps cache keys to their source sheet IDs (used by cache_utils)
|
||||
CACHE_SHEET_MAP = {
|
||||
"attendance_regular": ATTENDANCE_SHEET_ID,
|
||||
"attendance_juniors": ATTENDANCE_SHEET_ID,
|
||||
"exceptions_dict": PAYMENTS_SHEET_ID,
|
||||
"payments_transactions": PAYMENTS_SHEET_ID,
|
||||
}
|
||||
@@ -102,7 +102,7 @@ def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = F
|
||||
member_names = [m[0] for m in members_data]
|
||||
|
||||
# 3. Process rows
|
||||
print("Inffering details for empty rows...")
|
||||
print("Inferring details for empty rows...")
|
||||
updates = []
|
||||
|
||||
for i, row in enumerate(rows[1:], start=2):
|
||||
|
||||
@@ -3,12 +3,15 @@
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta
|
||||
from html.parser import HTMLParser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from attendance import get_members_with_fees
|
||||
from czech_utils import normalize, parse_month_references
|
||||
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
|
||||
@@ -203,7 +206,7 @@ def fetch_sheet_data(spreadsheet_id: str, credentials_path: str) -> list[dict]:
|
||||
return -1
|
||||
|
||||
idx_date = get_col_index("Date")
|
||||
idx_amount = get_col_index("Amount")
|
||||
idx_amount = get_col_index("Amount")
|
||||
idx_manual = get_col_index("manual fix")
|
||||
idx_person = get_col_index("Person")
|
||||
idx_purpose = get_col_index("Purpose")
|
||||
@@ -212,6 +215,11 @@ def fetch_sheet_data(spreadsheet_id: str, credentials_path: str) -> list[dict]:
|
||||
idx_message = get_col_index("Message")
|
||||
idx_bank_id = get_col_index("Bank ID")
|
||||
|
||||
required = {"Date": idx_date, "Amount": idx_amount, "Person": idx_person, "Purpose": idx_purpose}
|
||||
missing = [name for name, idx in required.items() if idx == -1]
|
||||
if missing:
|
||||
raise ValueError(f"Required columns missing from payments sheet: {', '.join(missing)}. Found headers: {header}")
|
||||
|
||||
transactions = []
|
||||
for row in rows[1:]:
|
||||
def get_val(idx):
|
||||
@@ -381,12 +389,13 @@ def reconcile(
|
||||
per_allocation = amount / num_allocations if num_allocations > 0 else 0
|
||||
|
||||
for member_name, confidence in matched_members:
|
||||
# If we matched via sheet 'Person' column, name might be partial or have markers
|
||||
# but usually it's the exact member name from get_members_with_fees.
|
||||
# Let's ensure it exists in our ledger.
|
||||
if member_name not in ledger:
|
||||
# Try matching by base name if it was Jan Novak (Kačerr) etc.
|
||||
pass
|
||||
logger.warning(
|
||||
"Payment matched to unknown member %r (tx: %s, %s) — adding to unmatched",
|
||||
member_name, tx.get("date", "?"), tx.get("message", "?"),
|
||||
)
|
||||
unmatched.append(tx)
|
||||
continue
|
||||
|
||||
for month_key in matched_months:
|
||||
entry = {
|
||||
@@ -396,7 +405,7 @@ def reconcile(
|
||||
"message": tx["message"],
|
||||
"confidence": confidence,
|
||||
}
|
||||
if month_key in ledger.get(member_name, {}):
|
||||
if month_key in ledger[member_name]:
|
||||
ledger[member_name][month_key]["paid"] += per_allocation
|
||||
ledger[member_name][month_key]["transactions"].append(entry)
|
||||
else:
|
||||
|
||||
@@ -14,8 +14,7 @@ from googleapiclient.discovery import build
|
||||
|
||||
from fio_utils import fetch_transactions
|
||||
|
||||
# Configuration
|
||||
DEFAULT_SPREADSHEET_ID = "1Om0YPoDVCH5cV8BrNz5LG5eR5MMU05ypQC7UMN1xn_Y"
|
||||
from config import PAYMENTS_SHEET_ID as DEFAULT_SPREADSHEET_ID
|
||||
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
|
||||
TOKEN_FILE = "token.pickle"
|
||||
COLUMN_LABELS = ["Date", "Amount", "manual fix", "Person", "Purpose", "Inferred Amount", "Sender", "VS", "Message", "Bank ID", "Sync ID"]
|
||||
|
||||
Reference in New Issue
Block a user