#!/usr/bin/env python3 """Capture pure-function output as JSON fixtures for parity testing. Each invocation emits exactly one JSON object to stdout. Pipe through scrub_fixtures.py before writing to go/tests/fixtures/. Usage: # Single case: python capture_fixtures.py --func normalize --case simple_ascii \\ --input-seed simple_ascii | python scrub_fixtures.py \\ > go/tests/fixtures/pure/normalize/simple_ascii.json # All seeds for a function (newline-delimited JSON, one object per line): python capture_fixtures.py --func normalize --all # Feed input from stdin (for ad-hoc cases): echo '{"text":"hello"}' | python capture_fixtures.py --func normalize \\ --case adhoc --input-stdin See scripts/_fixture_seeds.py for the seed registry. """ from __future__ import annotations import argparse import json import sys import os import datetime sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from czech_utils import normalize, parse_month_references from attendance import calculate_fee, calculate_junior_fee from infer_payments import parse_czk_amount from sync_fio_to_sheets import generate_sync_id as _py_generate_sync_id from match_payments import ( _build_name_variants, match_members, infer_transaction_details, format_date, reconcile, ) from czech_utils import normalize as _norm import _fixture_seeds as seeds # --------------------------------------------------------------------------- # Type-envelope helpers # --------------------------------------------------------------------------- def _decode_envelope(envelope): """Convert a {type, value} envelope to a Python value for function calls.""" if not isinstance(envelope, dict): return envelope t = envelope.get("type", "raw") v = envelope.get("value") if t == "none": return None if t == "int": return int(v) if t == "float": return float(v) if t == "string": return v return v # raw JSON value (for fields that don't use an envelope) # --------------------------------------------------------------------------- # Per-function capture implementations # --------------------------------------------------------------------------- def capture_normalize(inp: dict) -> dict: result = normalize(inp["text"]) return {"text": result} def capture_parse_month_references(inp: dict) -> dict: result = parse_month_references(inp["text"], inp.get("default_year", 2026)) return {"months": result} def capture_calculate_fee(inp: dict) -> dict: result = calculate_fee(inp["attendance_count"], inp["month_key"]) return {"fee": result} def capture_calculate_junior_fee(inp: dict) -> dict: raw = calculate_junior_fee(inp["attendance_count"], inp["month_key"]) if raw == "?": return {"value": 0, "unknown": True} return {"value": int(raw), "unknown": False} def capture_parse_czk_amount(inp: dict) -> dict: val = _decode_envelope(inp["val"]) result = parse_czk_amount(val) return {"amount": float(result)} def capture_generate_sync_id(inp: dict) -> dict: tx_in = inp["tx"] # Build the tx dict that generate_sync_id expects: # amount must be the Python-native type to replicate str(amount) faithfully. tx = {k: v for k, v in tx_in.items() if k != "amount"} tx["amount"] = _decode_envelope(tx_in["amount"]) result = _py_generate_sync_id(tx) return {"sync_id": result} def capture_build_name_variants(inp: dict) -> dict: result = _build_name_variants(inp["full_name"]) return {"variants": result} def capture_match_members(inp: dict) -> dict: matches = match_members(inp["text"], inp["member_names"]) return { "matches": [{"name": name, "confidence": conf} for name, conf in matches] } def capture_infer_transaction_details(inp: dict) -> dict: tx_in = inp["tx"] tx = dict(tx_in) tx["date"] = _decode_envelope(tx_in.get("date")) result = infer_transaction_details(tx, inp["member_names"]) return { "matches": [{"name": n, "confidence": c} for n, c in result["members"]], "months": result["months"], "search_text": result.get("search_text", result.get("matched_text", "")), } def capture_format_date(inp: dict) -> dict: val = _decode_envelope(inp["val"]) result = format_date(val) return {"date": result} def _build_exceptions(exc_list): """Convert seed exceptions to the dict reconcile() expects. Accepts both the legacy list format [name, period, amount, note] and the new dict format {"name": ..., "period": ..., "amount": ..., "note": ...}.""" if not exc_list: return {} result = {} for row in exc_list: if isinstance(row, dict): name = row.get("name", "") period = row.get("period", "") amount = row.get("amount", 0) note = row.get("note", "") else: name, period, amount = row[0], row[1], row[2] note = row[3] if len(row) > 3 else "" result[(_norm(name), _norm(period))] = {"amount": int(amount), "note": note} return result def _member_fee_dict(fees_raw: dict) -> dict: """Convert seed fees dict to the form reconcile() expects.""" # Seeds store fees as [fee, count] lists (JSON) or (fee, count) tuples. result = {} for month, v in fees_raw.items(): if isinstance(v, (list, tuple)) and len(v) == 2: result[month] = (int(v[0]), int(v[1])) else: result[month] = int(v) return result def _tx_entry_out(tx): """Convert a reconcile output TxEntry dict to a serializable form.""" return { "amount": float(tx.get("amount", 0)), "date": tx.get("date", ""), "sender": tx.get("sender", ""), "message": tx.get("message", ""), "confidence": tx.get("confidence", ""), } def _other_entry_out(e): return { "amount": float(e.get("amount", 0)), "date": e.get("date", ""), "sender": e.get("sender", ""), "message": e.get("message", ""), "purpose": e.get("purpose", ""), "confidence": e.get("confidence", ""), } def _month_data_out(md): return { "expected": int(md["expected"]) if isinstance(md["expected"], (int, float)) else 0, "original_expected": int(md["original_expected"]) if isinstance(md.get("original_expected"), (int, float)) else 0, "attendance_count": int(md.get("attendance_count", 0)), "exception": md.get("exception"), "paid": float(md["paid"]), "transactions": [_tx_entry_out(t) for t in md.get("transactions", [])], } def _unmatched_tx_out(tx): return { "date": tx.get("date", ""), "amount": float(tx.get("amount", 0)), "person": tx.get("person", ""), "purpose": tx.get("purpose", ""), "sender": tx.get("sender", ""), "message": tx.get("message", ""), "bank_id": tx.get("bank_id", ""), } def capture_reconcile(inp: dict) -> dict: # Convert members from seed format to reconcile() format. # Accepts both the new dict format {"name":..., "tier":..., "fees":{...}} # and the legacy tuple format [name, tier, fees_dict]. members_in = inp["members"] members = [] for m in members_in: if isinstance(m, dict): name, tier, fees_raw = m["name"], m["tier"], m.get("fees", {}) else: name, tier, fees_raw = m[0], m[1], m[2] members.append((name, tier, _member_fee_dict(fees_raw))) exceptions = _build_exceptions(inp.get("exceptions") or []) sorted_months = inp["sorted_months"] transactions = inp["transactions"] result = reconcile(members, sorted_months, transactions, exceptions) members_out = {} for name, mr in result["members"].items(): members_out[name] = { "tier": mr["tier"], "months": {m: _month_data_out(md) for m, md in mr["months"].items()}, "other_transactions": [_other_entry_out(e) for e in mr.get("other_transactions", [])], "total_balance": int(mr["total_balance"]), } return { "members": members_out, "unmatched": [_unmatched_tx_out(tx) for tx in result["unmatched"]], "credits": {k: int(v) for k, v in result["credits"].items()}, } # --------------------------------------------------------------------------- # Dispatcher # --------------------------------------------------------------------------- _DISPATCHERS = { "normalize": capture_normalize, "parse_month_references": capture_parse_month_references, "calculate_fee": capture_calculate_fee, "calculate_junior_fee": capture_calculate_junior_fee, "parse_czk_amount": capture_parse_czk_amount, "generate_sync_id": capture_generate_sync_id, "build_name_variants": capture_build_name_variants, "match_members": capture_match_members, "infer_transaction_details": capture_infer_transaction_details, "format_date": capture_format_date, "reconcile": capture_reconcile, } _FUNC_MODULE = { "normalize": "scripts.czech_utils.normalize", "parse_month_references": "scripts.czech_utils.parse_month_references", "calculate_fee": "scripts.attendance.calculate_fee", "calculate_junior_fee": "scripts.attendance.calculate_junior_fee", "parse_czk_amount": "scripts.infer_payments.parse_czk_amount", "generate_sync_id": "scripts.sync_fio_to_sheets.generate_sync_id", "build_name_variants": "scripts.match_payments._build_name_variants", "match_members": "scripts.match_payments.match_members", "infer_transaction_details": "scripts.match_payments.infer_transaction_details", "format_date": "scripts.match_payments.format_date", "reconcile": "scripts.match_payments.reconcile", } def _emit(func_name: str, case_id: str, inp: dict) -> None: dispatch = _DISPATCHERS[func_name] output = dispatch(inp) doc = { "case": case_id, "func": _FUNC_MODULE[func_name], "captured_at": datetime.date.today().isoformat(), "input": inp, "output": output, } print(json.dumps(doc, ensure_ascii=False)) def _all_seeds(func_name: str): """Yield (case_id, seed) for all seeds of a function.""" for (fn, case_id), seed in seeds.SEEDS.items(): if fn == func_name: yield case_id, seed # Real-data seeds if func_name == "parse_month_references": yield from seeds.real_parse_month_references_seeds() if func_name == "match_members": yield from seeds.real_match_members_seeds() def main() -> None: parser = argparse.ArgumentParser( description="Capture pure-function outputs as JSON fixtures." ) parser.add_argument( "--func", required=True, choices=list(_DISPATCHERS), help="Function to capture." ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--case", help="Case ID (file stem). Use with --input-seed or --input-stdin.") group.add_argument("--all", action="store_true", help="Emit all seeds for the function.") parser.add_argument( "--input-seed", metavar="SEED_ID", help="Seed key in _fixture_seeds.SEEDS (required unless --input-stdin or --all).", ) parser.add_argument( "--input-stdin", action="store_true", help="Read input JSON from stdin instead of seed registry.", ) args = parser.parse_args() if args.all: for case_id, seed in _all_seeds(args.func): _emit(args.func, case_id, seed) return # Single case if args.input_stdin: inp = json.load(sys.stdin) elif args.input_seed: key = (args.func, args.input_seed) if key not in seeds.SEEDS: sys.exit(f"Seed ({args.func!r}, {args.input_seed!r}) not found in _fixture_seeds.SEEDS") inp = seeds.SEEDS[key] else: parser.error("Provide --input-seed SEED_ID or --input-stdin.") _emit(args.func, args.case, inp) if __name__ == "__main__": main()