Files
fuj-management/scripts/infer_payments.py
Jan Novak c5a8a4e7b1
Some checks failed
Deploy to K8s / deploy (push) Successful in 10s
Build and Push / build (push) Successful in 6s
Build and Push / build-go (push) Failing after 12m23s
fix: include juniors in payment-inference roster
infer_payments was building member_names from get_members_with_fees()
(adults sheet only). Junior-only members were invisible to the matcher,
so a payment message containing an exact junior name would produce a
fuzzy review match against a different adult sharing the same first name.

Fix: union the adult and junior rosters (deduped via canonical_member_key)
so all members are candidates. The existing exact-name short-circuit in
match_members then handles precedence correctly.

Two regression tests added for the Jáchym Kubík / Jáchym Hrušák case.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-06 16:38:21 +02:00

201 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""Infer 'Person', 'Purpose', and 'Amount' for transactions in Google Sheets."""
import argparse
import os
import sys
from datetime import datetime
# Add the current directory to sys.path to import local modules
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from googleapiclient.discovery import build
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
from match_payments import infer_transaction_details, canonical_member_key
from attendance import get_members_with_fees, get_junior_members_with_fees
def parse_czk_amount(val) -> float:
"""Parse Czech currency string or handle raw numeric value."""
if val is None or val == "":
return 0.0
if isinstance(val, (int, float)):
return float(val)
val = str(val)
# Strip currency symbol and spaces
val = val.replace("", "").replace("CZK", "").strip()
# Remove thousand separators (often space or dot)
# Heuristic: if there's a comma, it's the decimal separator.
# If there's a dot, it might be a thousand separator OR decimal separator.
if "," in val:
# 1.500,00 -> 1500.00
val = val.replace(".", "").replace(" ", "").replace(",", ".")
else:
# 1 500.00 -> 1500.00 or 1.500.00 -> ???
# If there are multiple dots, it's thousand separator.
if val.count(".") > 1:
val = val.replace(".", "").replace(" ", "")
# If there's one dot, it might be decimal separator.
else:
val = val.replace(" ", "")
try:
return float(val)
except ValueError:
return 0.0
# Column names as requested by the user
COL_MANUAL = "manual fix"
COL_PERSON = "Person"
COL_PURPOSE = "Purpose"
COL_AMOUNT = "Inferred Amount"
def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = False):
print(f"Connecting to Google Sheets...")
service = get_sheets_service(credentials_path)
sheet = service.spreadsheets()
# 1. Fetch all data from the sheet
print("Reading sheet data...")
result = sheet.values().get(
spreadsheetId=spreadsheet_id,
range="A1:Z", # Read a broad range to find existing columns
valueRenderOption="UNFORMATTED_VALUE"
).execute()
rows = result.get("values", [])
if not rows:
print("Sheet is empty.")
return
header = rows[0]
# Identify indices of existing columns
def get_col_index(label):
normalized_label = label.lower().strip()
for i, h in enumerate(header):
if h.lower().strip() == normalized_label:
return i
return -1
idx_date = get_col_index("Date")
idx_amount_raw = get_col_index("Amount") # Bank Amount
idx_sender = get_col_index("Sender")
idx_message = get_col_index("Message")
idx_vs = get_col_index("VS")
target_labels = [COL_MANUAL, COL_PERSON, COL_PURPOSE, COL_AMOUNT]
# Refresh indices
idx_manual = get_col_index(COL_MANUAL)
idx_inferred_person = get_col_index(COL_PERSON)
idx_inferred_purpose = get_col_index(COL_PURPOSE)
idx_inferred_amount = get_col_index(COL_AMOUNT)
if idx_inferred_person == -1 or idx_inferred_purpose == -1 or idx_inferred_amount == -1:
print(f"Error: Required columns {target_labels[1:]} not found in sheet.")
print(f"Current header: {header}")
return
# 2. Fetch members for matching — union adults + juniors so junior-only
# members (e.g. kids not on the adult sheet) are visible to the matcher.
print("Fetching member list for matching...")
adult_members, _ = get_members_with_fees()
junior_members, _ = get_junior_members_with_fees()
seen: set[str] = set()
member_names: list[str] = []
for m in adult_members + junior_members:
key = canonical_member_key(m[0])
if key not in seen:
seen.add(key)
member_names.append(m[0])
# 3. Process rows
print("Inferring details for empty rows...")
updates = []
for i, row in enumerate(rows[1:], start=2):
# Extend row if it's shorter than existing header
while len(row) < len(header):
row.append("")
# Check if already filled (manual override)
val_manual = str(row[idx_manual]) if idx_manual != -1 and idx_manual < len(row) else ""
val_person = str(row[idx_inferred_person]) if idx_inferred_person < len(row) else ""
val_purpose = str(row[idx_inferred_purpose]) if idx_inferred_purpose < len(row) else ""
if val_manual.strip() or val_person.strip() or val_purpose.strip():
continue
# Prepare transaction dict for matching logic
tx = {
"date": row[idx_date] if idx_date != -1 and idx_date < len(row) else "",
"amount": parse_czk_amount(row[idx_amount_raw]) if idx_amount_raw != -1 and idx_amount_raw < len(row) and row[idx_amount_raw] else 0,
"sender": row[idx_sender] if idx_sender != -1 and idx_sender < len(row) else "",
"message": row[idx_message] if idx_message != -1 and idx_message < len(row) else "",
"vs": row[idx_vs] if idx_vs != -1 and idx_vs < len(row) else "",
}
inference = infer_transaction_details(tx, member_names)
# Sort members by confidence and add markers
peeps = []
for name, conf in inference["members"]:
prefix = "[?] " if conf == "review" else ""
peeps.append(f"{prefix}{name}")
matched_months = inference["months"]
if peeps or matched_months:
person_val = ", ".join(peeps)
purpose_val = ", ".join(matched_months)
amount_val = str(tx["amount"]) # For now, use total amount
print(f"Row {i}: Inferred {person_val} for {purpose_val} ({amount_val} CZK)")
# Update the row in memory (for terminal output/dry run)
row[idx_inferred_person] = person_val
row[idx_inferred_purpose] = purpose_val
row[idx_inferred_amount] = amount_val
# Prepare batch update
updates.append({
"range": f"R{i}C{idx_inferred_person+1}:R{i}C{idx_inferred_amount+1}",
"values": [[person_val, purpose_val, amount_val]]
})
if not updates:
print("No new inferences to make.")
return
if dry_run:
print(f"Dry run: would update {len(updates)} rows.")
else:
print(f"Applying {len(updates)} updates to the sheet...")
body = {
"valueInputOption": "USER_ENTERED",
"data": updates
}
sheet.values().batchUpdate(
spreadsheetId=spreadsheet_id,
body=body
).execute()
print("Update completed successfully.")
def main():
parser = argparse.ArgumentParser(description="Infer payment details in Google Sheets.")
parser.add_argument("--sheet-id", default=DEFAULT_SPREADSHEET_ID, help="Google Sheet ID")
parser.add_argument("--credentials", default="credentials.json", help="Path to Google API credentials JSON")
parser.add_argument("--dry-run", action="store_true", help="Print updates without applying them")
args = parser.parse_args()
try:
infer_payments(args.sheet_id, args.credentials, args.dry_run)
except Exception as e:
print(f"Inference failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()