feat: implement automated payment inference and sync to Google Sheets

This commit is contained in:
Jan Novak
2026-03-02 14:29:45 +01:00
parent 65e40d116b
commit d719383c9c
10 changed files with 1520 additions and 264 deletions

191
scripts/infer_payments.py Normal file
View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python3
"""Infer 'Person', 'Purpose', and 'Amount' for transactions in Google Sheets."""
import argparse
import os
import sys
from datetime import datetime
# Add the current directory to sys.path to import local modules
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from googleapiclient.discovery import build
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
from match_payments import infer_transaction_details
from attendance import get_members_with_fees
def parse_czk_amount(val) -> float:
"""Parse Czech currency string or handle raw numeric value."""
if val is None or val == "":
return 0.0
if isinstance(val, (int, float)):
return float(val)
val = str(val)
# Strip currency symbol and spaces
val = val.replace("", "").replace("CZK", "").strip()
# Remove thousand separators (often space or dot)
# Heuristic: if there's a comma, it's the decimal separator.
# If there's a dot, it might be a thousand separator OR decimal separator.
if "," in val:
# 1.500,00 -> 1500.00
val = val.replace(".", "").replace(" ", "").replace(",", ".")
else:
# 1 500.00 -> 1500.00 or 1.500.00 -> ???
# If there are multiple dots, it's thousand separator.
if val.count(".") > 1:
val = val.replace(".", "").replace(" ", "")
# If there's one dot, it might be decimal separator.
else:
val = val.replace(" ", "")
try:
return float(val)
except ValueError:
return 0.0
# Column names as requested by the user
COL_MANUAL = "manual fix"
COL_PERSON = "Person"
COL_PURPOSE = "Purpose"
COL_AMOUNT = "Inferred Amount"
def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = False):
print(f"Connecting to Google Sheets...")
service = get_sheets_service(credentials_path)
sheet = service.spreadsheets()
# 1. Fetch all data from the sheet
print("Reading sheet data...")
result = sheet.values().get(
spreadsheetId=spreadsheet_id,
range="A1:Z", # Read a broad range to find existing columns
valueRenderOption="UNFORMATTED_VALUE"
).execute()
rows = result.get("values", [])
if not rows:
print("Sheet is empty.")
return
header = rows[0]
# Identify indices of existing columns
def get_col_index(label):
normalized_label = label.lower().strip()
for i, h in enumerate(header):
if h.lower().strip() == normalized_label:
return i
return -1
idx_date = get_col_index("Date")
idx_amount_raw = get_col_index("Amount") # Bank Amount
idx_sender = get_col_index("Sender")
idx_message = get_col_index("Message")
idx_vs = get_col_index("VS")
target_labels = [COL_MANUAL, COL_PERSON, COL_PURPOSE, COL_AMOUNT]
# Refresh indices
idx_manual = get_col_index(COL_MANUAL)
idx_inferred_person = get_col_index(COL_PERSON)
idx_inferred_purpose = get_col_index(COL_PURPOSE)
idx_inferred_amount = get_col_index(COL_AMOUNT)
if idx_inferred_person == -1 or idx_inferred_purpose == -1 or idx_inferred_amount == -1:
print(f"Error: Required columns {target_labels[1:]} not found in sheet.")
print(f"Current header: {header}")
return
# 2. Fetch members for matching
print("Fetching member list for matching...")
members_data, _ = get_members_with_fees()
member_names = [m[0] for m in members_data]
# 3. Process rows
print("Inffering details for empty rows...")
updates = []
for i, row in enumerate(rows[1:], start=2):
# Extend row if it's shorter than existing header
while len(row) < len(header):
row.append("")
# Check if already filled (manual override)
val_manual = str(row[idx_manual]) if idx_manual != -1 and idx_manual < len(row) else ""
val_person = str(row[idx_inferred_person]) if idx_inferred_person < len(row) else ""
val_purpose = str(row[idx_inferred_purpose]) if idx_inferred_purpose < len(row) else ""
if val_manual.strip() or val_person.strip() or val_purpose.strip():
continue
# Prepare transaction dict for matching logic
tx = {
"date": row[idx_date] if idx_date != -1 and idx_date < len(row) else "",
"amount": parse_czk_amount(row[idx_amount_raw]) if idx_amount_raw != -1 and idx_amount_raw < len(row) and row[idx_amount_raw] else 0,
"sender": row[idx_sender] if idx_sender != -1 and idx_sender < len(row) else "",
"message": row[idx_message] if idx_message != -1 and idx_message < len(row) else "",
"vs": row[idx_vs] if idx_vs != -1 and idx_vs < len(row) else "",
}
inference = infer_transaction_details(tx, member_names)
# Sort members by confidence and add markers
peeps = []
for name, conf in inference["members"]:
prefix = "[?] " if conf == "review" else ""
peeps.append(f"{prefix}{name}")
matched_months = inference["months"]
if peeps or matched_months:
person_val = ", ".join(peeps)
purpose_val = ", ".join(matched_months)
amount_val = str(tx["amount"]) # For now, use total amount
print(f"Row {i}: Inferred {person_val} for {purpose_val} ({amount_val} CZK)")
# Update the row in memory (for terminal output/dry run)
row[idx_inferred_person] = person_val
row[idx_inferred_purpose] = purpose_val
row[idx_inferred_amount] = amount_val
# Prepare batch update
updates.append({
"range": f"R{i}C{idx_inferred_person+1}:R{i}C{idx_inferred_amount+1}",
"values": [[person_val, purpose_val, amount_val]]
})
if not updates:
print("No new inferences to make.")
return
if dry_run:
print(f"Dry run: would update {len(updates)} rows.")
else:
print(f"Applying {len(updates)} updates to the sheet...")
body = {
"valueInputOption": "USER_ENTERED",
"data": updates
}
sheet.values().batchUpdate(
spreadsheetId=spreadsheet_id,
body=body
).execute()
print("Update completed successfully.")
def main():
parser = argparse.ArgumentParser(description="Infer payment details in Google Sheets.")
parser.add_argument("--sheet-id", default=DEFAULT_SPREADSHEET_ID, help="Google Sheet ID")
parser.add_argument("--credentials", default="credentials.json", help="Path to Google API credentials JSON")
parser.add_argument("--dry-run", action="store_true", help="Print updates without applying them")
args = parser.parse_args()
try:
infer_payments(args.sheet_id, args.credentials, args.dry_run)
except Exception as e:
print(f"Inference failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()