fuj-management/scripts/infer_payments.py

#!/usr/bin/env python3
"""Infer 'Person', 'Purpose', and 'Amount' for transactions in Google Sheets."""

import argparse
import os
import sys
from datetime import datetime

# Add the current directory to sys.path to import local modules
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from googleapiclient.discovery import build
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
from match_payments import infer_transaction_details, canonical_member_key
from attendance import get_members_with_fees, get_junior_members_with_fees

def parse_czk_amount(val) -> float:
    """Parse Czech currency string or handle raw numeric value."""
    if val is None or val == "":
        return 0.0
    if isinstance(val, (int, float)):
        return float(val)

    val = str(val)
    # Strip currency symbol and spaces
    val = val.replace("Kč", "").replace("CZK", "").strip()
    # Remove thousand separators (often space or dot)
    # Heuristic: if there's a comma, it's the decimal separator.
    # If there's a dot, it might be a thousand separator OR decimal separator.
    if "," in val:
        # 1.500,00 -> 1500.00
        val = val.replace(".", "").replace(" ", "").replace(",", ".")
    else:
        # 1 500.00 -> 1500.00 or 1.500.00 -> ???
        # If there are multiple dots, it's thousand separator.
        if val.count(".") > 1:
            val = val.replace(".", "").replace(" ", "")
        # If there's one dot, it might be decimal separator.
        else:
            val = val.replace(" ", "")

    try:
        return float(val)
    except ValueError:
        return 0.0

# Column names as requested by the user
COL_MANUAL = "manual fix"
COL_PERSON = "Person"
COL_PURPOSE = "Purpose"
COL_AMOUNT = "Inferred Amount"

def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = False):
    print(f"Connecting to Google Sheets...")
    service = get_sheets_service(credentials_path)
    sheet = service.spreadsheets()

    # 1. Fetch all data from the sheet
    print("Reading sheet data...")
    result = sheet.values().get(
        spreadsheetId=spreadsheet_id,
        range="A1:Z",  # Read a broad range to find existing columns
        valueRenderOption="UNFORMATTED_VALUE"
    ).execute()
    rows = result.get("values", [])
    if not rows:
        print("Sheet is empty.")
        return

    header = rows[0]

    # Identify indices of existing columns
    def get_col_index(label):
        normalized_label = label.lower().strip()
        for i, h in enumerate(header):
            if h.lower().strip() == normalized_label:
                return i
        return -1

    idx_date = get_col_index("Date")
    idx_amount_raw = get_col_index("Amount") # Bank Amount
    idx_sender = get_col_index("Sender")
    idx_message = get_col_index("Message")
    idx_vs = get_col_index("VS")

    target_labels = [COL_MANUAL, COL_PERSON, COL_PURPOSE, COL_AMOUNT]

    # Refresh indices
    idx_manual = get_col_index(COL_MANUAL)
    idx_inferred_person = get_col_index(COL_PERSON)
    idx_inferred_purpose = get_col_index(COL_PURPOSE)
    idx_inferred_amount = get_col_index(COL_AMOUNT)

    if idx_inferred_person == -1 or idx_inferred_purpose == -1 or idx_inferred_amount == -1:
        print(f"Error: Required columns {target_labels[1:]} not found in sheet.")
        print(f"Current header: {header}")
        return

    # 2. Fetch members for matching — union adults + juniors so junior-only
    # members (e.g. kids not on the adult sheet) are visible to the matcher.
    print("Fetching member list for matching...")
    adult_members, _ = get_members_with_fees()
    junior_members, _ = get_junior_members_with_fees()

    seen: set[str] = set()
    member_names: list[str] = []
    for m in adult_members + junior_members:
        key = canonical_member_key(m[0])
        if key not in seen:
            seen.add(key)
            member_names.append(m[0])

    # 3. Process rows
    print("Inferring details for empty rows...")
    updates = []

    for i, row in enumerate(rows[1:], start=2):
        # Extend row if it's shorter than existing header
        while len(row) < len(header):
            row.append("")

        # Check if already filled (manual override)
        val_manual = str(row[idx_manual]) if idx_manual != -1 and idx_manual < len(row) else ""
        val_person = str(row[idx_inferred_person]) if idx_inferred_person < len(row) else ""
        val_purpose = str(row[idx_inferred_purpose]) if idx_inferred_purpose < len(row) else ""

        if val_manual.strip() or val_person.strip() or val_purpose.strip():
            continue

        # Prepare transaction dict for matching logic
        tx = {
            "date": row[idx_date] if idx_date != -1 and idx_date < len(row) else "",
            "amount": parse_czk_amount(row[idx_amount_raw]) if idx_amount_raw != -1 and idx_amount_raw < len(row) and row[idx_amount_raw] else 0,
            "sender": row[idx_sender] if idx_sender != -1 and idx_sender < len(row) else "",
            "message": row[idx_message] if idx_message != -1 and idx_message < len(row) else "",
            "vs": row[idx_vs] if idx_vs != -1 and idx_vs < len(row) else "",
        }

        inference = infer_transaction_details(tx, member_names)

        # Sort members by confidence and add markers
        peeps = []
        for name, conf in inference["members"]:
            prefix = "[?] " if conf == "review" else ""
            peeps.append(f"{prefix}{name}")

        matched_months = inference["months"]

        if peeps or matched_months:
            person_val = ", ".join(peeps)
            purpose_val = ", ".join(matched_months)
            amount_val = str(tx["amount"]) # For now, use total amount

            print(f"Row {i}: Inferred {person_val} for {purpose_val} ({amount_val} CZK)")

            # Update the row in memory (for terminal output/dry run)
            row[idx_inferred_person] = person_val
            row[idx_inferred_purpose] = purpose_val
            row[idx_inferred_amount] = amount_val

            # Prepare batch update
            updates.append({
                "range": f"R{i}C{idx_inferred_person+1}:R{i}C{idx_inferred_amount+1}",
                "values": [[person_val, purpose_val, amount_val]]
            })

    if not updates:
        print("No new inferences to make.")
        return

    if dry_run:
        print(f"Dry run: would update {len(updates)} rows.")
    else:
        print(f"Applying {len(updates)} updates to the sheet...")
        body = {
            "valueInputOption": "USER_ENTERED",
            "data": updates
        }
        sheet.values().batchUpdate(
            spreadsheetId=spreadsheet_id,
            body=body
        ).execute()
        print("Update completed successfully.")

def main():
    parser = argparse.ArgumentParser(description="Infer payment details in Google Sheets.")
    parser.add_argument("--sheet-id", default=DEFAULT_SPREADSHEET_ID, help="Google Sheet ID")
    parser.add_argument("--credentials", default="credentials.json", help="Path to Google API credentials JSON")
    parser.add_argument("--dry-run", action="store_true", help="Print updates without applying them")
    args = parser.parse_args()

    try:
        infer_payments(args.sheet_id, args.credentials, args.dry_run)
    except Exception as e:
        print(f"Inference failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()