"""Fixture seed registry for capture_fixtures.py. Seeds are keyed by (func_name, case_id). Values are dicts whose keys match the fixture input schema defined in docs/plans/2026-05-06-2111-go-m3-fixture-capture.md. Real-data seeds for parse_month_references and match_members are loaded from tmp/payments_transactions_cache.json and tmp/attendance_regular_cache.json at hardcoded indices selected once interactively for coverage. """ from __future__ import annotations import json import os from typing import Any # --------------------------------------------------------------------------- # Helper to load cache files # --------------------------------------------------------------------------- _REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) def _load_cache(name: str) -> Any: path = os.path.join(_REPO, "tmp", name) if not os.path.exists(path): return None with open(path, encoding="utf-8") as f: return json.load(f) # --------------------------------------------------------------------------- # Handcrafted seed registry # --------------------------------------------------------------------------- SEEDS: dict[tuple[str, str], dict] = {} # --- normalize --- SEEDS[("normalize", "simple_ascii")] = {"text": "hello world"} SEEDS[("normalize", "czech_basic")] = {"text": "štefan čakrtový"} SEEDS[("normalize", "czech_full_set")] = { "text": "áčďéěíňóřšťůúýžÁČĎÉĚÍŇÓŘŠŤŮÚÝŽ" } SEEDS[("normalize", "with_parens")] = {"text": "Pavel Smutný (Štrúdl)"} SEEDS[("normalize", "mixed_case")] = {"text": "Henrietta OTTOVÁ"} SEEDS[("normalize", "empty_string")] = {"text": ""} SEEDS[("normalize", "digits_symbols")] = {"text": "FUJ2026! +3"} # --- parse_month_references --- SEEDS[("parse_month_references", "empty_string")] = { "text": "", "default_year": 2026 } SEEDS[("parse_month_references", "single_czech_leden")] = { "text": "leden", "default_year": 2026 } SEEDS[("parse_month_references", "single_czech_prosinec_high_month")] = { "text": "prosinec", "default_year": 2026 } SEEDS[("parse_month_references", "single_czech_rijen_high_month")] = { "text": "říjen", "default_year": 2026 } SEEDS[("parse_month_references", "range_wrap_prosinec_leden")] = { "text": "prosinec-leden", "default_year": 2026 } SEEDS[("parse_month_references", "range_wrap_listopad_leden")] = { "text": "listopad-leden", "default_year": 2026 } SEEDS[("parse_month_references", "range_no_wrap_leden_unor")] = { "text": "leden-únor", "default_year": 2026 } SEEDS[("parse_month_references", "numeric_slash_two_digit_year")] = { "text": "01/26", "default_year": 2026 } SEEDS[("parse_month_references", "numeric_slash_four_digit_year")] = { "text": "1/2026", "default_year": 2026 } SEEDS[("parse_month_references", "numeric_slash_leading_zero")] = { "text": "03/2026", "default_year": 2026 } SEEDS[("parse_month_references", "numeric_plus_multi")] = { "text": "11+12/2025", "default_year": 2026 } SEEDS[("parse_month_references", "numeric_dot_format")] = { "text": "12.2025", "default_year": 2026 } SEEDS[("parse_month_references", "mixed_czech_numeric")] = { "text": "leden+únor+03/2026", "default_year": 2026 } SEEDS[("parse_month_references", "no_month_found")] = { "text": "random text without months", "default_year": 2026 } # --- calculate_fee --- SEEDS[("calculate_fee", "zero_sessions")] = { "attendance_count": 0, "month_key": "2026-01" } SEEDS[("calculate_fee", "one_session")] = { "attendance_count": 1, "month_key": "2026-01" } SEEDS[("calculate_fee", "two_sessions_known_rate")] = { "attendance_count": 2, "month_key": "2026-01" } SEEDS[("calculate_fee", "three_sessions_known_rate")] = { "attendance_count": 3, "month_key": "2026-02" } SEEDS[("calculate_fee", "two_sessions_reduced_march")] = { "attendance_count": 2, "month_key": "2026-03" } SEEDS[("calculate_fee", "two_sessions_default_fallback")] = { "attendance_count": 2, "month_key": "2099-01" } # --- calculate_junior_fee --- SEEDS[("calculate_junior_fee", "zero_sessions")] = { "attendance_count": 0, "month_key": "2026-01" } SEEDS[("calculate_junior_fee", "one_session_unknown")] = { "attendance_count": 1, "month_key": "2026-01" } SEEDS[("calculate_junior_fee", "two_sessions_default")] = { "attendance_count": 2, "month_key": "2026-01" } SEEDS[("calculate_junior_fee", "two_sessions_reduced_march")] = { "attendance_count": 2, "month_key": "2026-03" } SEEDS[("calculate_junior_fee", "two_sessions_reduced_sep")] = { "attendance_count": 2, "month_key": "2025-09" } SEEDS[("calculate_junior_fee", "two_sessions_default_fallback")] = { "attendance_count": 2, "month_key": "2099-06" } # --- parse_czk_amount --- SEEDS[("parse_czk_amount", "none_value")] = { "val": {"type": "none"} } SEEDS[("parse_czk_amount", "empty_string")] = { "val": {"type": "string", "value": ""} } SEEDS[("parse_czk_amount", "plain_int")] = { "val": {"type": "int", "value": 750} } SEEDS[("parse_czk_amount", "plain_float")] = { "val": {"type": "float", "value": 750.0} } SEEDS[("parse_czk_amount", "czech_comma_decimal")] = { "val": {"type": "string", "value": "1.500,00"} } SEEDS[("parse_czk_amount", "czech_comma_no_thousands")] = { "val": {"type": "string", "value": "750,00"} } SEEDS[("parse_czk_amount", "dot_decimal")] = { "val": {"type": "string", "value": "1500.00"} } SEEDS[("parse_czk_amount", "dot_thousand_separator")] = { "val": {"type": "string", "value": "1.500"} } SEEDS[("parse_czk_amount", "with_kc_suffix")] = { "val": {"type": "string", "value": "750 Kč"} } SEEDS[("parse_czk_amount", "with_czk_suffix")] = { "val": {"type": "string", "value": "1500CZK"} } SEEDS[("parse_czk_amount", "space_thousands")] = { "val": {"type": "string", "value": "1 500"} } # --- generate_sync_id --- def _sync_tx(date, amount, currency, sender, vs, message, bank_id): """Build a generate_sync_id input seed.""" return { "tx": { "date": date, "amount": amount, "currency": currency, "sender": sender, "vs": vs, "message": message, "bank_id": bank_id, } } SEEDS[("generate_sync_id", "typical_float_amount")] = _sync_tx( "2026-01-15", {"type": "float", "value": 750.0}, "CZK", "Test Sender", "123456", "pausal leden", "100000001", ) SEEDS[("generate_sync_id", "integer_amount")] = _sync_tx( "2026-01-15", {"type": "int", "value": 750}, "CZK", "Test Sender", "123456", "pausal leden", "100000001", ) SEEDS[("generate_sync_id", "missing_currency")] = { "tx": { "date": "2026-02-01", "amount": {"type": "float", "value": 500.0}, "sender": "Another Person", "vs": "654321", "message": "trenink", "bank_id": "200000002", } } SEEDS[("generate_sync_id", "empty_fields")] = _sync_tx( "2026-03-01", {"type": "float", "value": 0.0}, "CZK", "", "", "", "", ) SEEDS[("generate_sync_id", "large_amount")] = _sync_tx( "2025-10-05", {"type": "float", "value": 2100.0}, "CZK", "Payer Name", "987654", "FUJ treninky", "300000003", ) # --- build_name_variants --- SEEDS[("build_name_variants", "full_name_no_nick")] = { "full_name": "Jan Novák" } SEEDS[("build_name_variants", "with_nickname")] = { "full_name": "František Vrbík (Štrúdl)" } SEEDS[("build_name_variants", "three_word_name")] = { "full_name": "Jan Tomášek (Honza)" } SEEDS[("build_name_variants", "single_word")] = { "full_name": "Jáchym" } SEEDS[("build_name_variants", "short_name_filtered")] = { "full_name": "Jo" } SEEDS[("build_name_variants", "common_diacritics")] = { "full_name": "Alžběta Testovická" } # --- match_members --- # Synthetic roster — deliberately NOT real member names. # Tomáš Fiktivný has a nickname (Tov) for nickname-match tests. # Pavel Smutný has a nickname (Štrúdl) for nickname tests. # Adam Novák: normalized last name "novak" is in _COMMON_SURNAMES → common-surname filter test. _ROSTER = [ "Alžběta Testovická", "Tomáš Fiktivný (Tov)", "Pavel Smutný (Štrúdl)", "Jana Nováková", "Adam Novák", ] SEEDS[("match_members", "exact_full_name")] = { "text": "platba od alzbeta testovicka leden", "member_names": _ROSTER, } SEEDS[("match_members", "first_and_last")] = { "text": "jan nový payment tomas fiktivny", "member_names": _ROSTER, } SEEDS[("match_members", "nickname_match")] = { "text": "payment from strudl", "member_names": _ROSTER, } SEEDS[("match_members", "review_lastname_only")] = { "text": "testovicka leden", "member_names": _ROSTER, } SEEDS[("match_members", "common_surname_no_match")] = { "text": "novak leden", "member_names": _ROSTER, } SEEDS[("match_members", "no_match")] = { "text": "xyz platba", "member_names": _ROSTER, } SEEDS[("match_members", "two_members_exact")] = { "text": "pavel smutny a alzbeta testovicka", "member_names": _ROSTER, } # --- infer_transaction_details --- SEEDS[("infer_transaction_details", "member_in_message")] = { "tx": { "sender": "Test Payer", "message": "alzbeta testovicka leden 2026", "user_id": "", "date": {"type": "string", "value": "2026-01-15"}, }, "member_names": _ROSTER, "default_year": 2026, } SEEDS[("infer_transaction_details", "member_in_sender")] = { "tx": { "sender": "Tomáš Fiktivný", "message": "FUJ trenink", "user_id": "", "date": {"type": "string", "value": "2026-02-01"}, }, "member_names": _ROSTER, "default_year": 2026, } SEEDS[("infer_transaction_details", "month_fallback_from_date")] = { "tx": { "sender": "Alžběta Testovická", "message": "platba", "user_id": "", "date": {"type": "string", "value": "2026-03-15"}, }, "member_names": _ROSTER, "default_year": 2026, } SEEDS[("infer_transaction_details", "serial_date")] = { "tx": { "sender": "Jana Nováková", "message": "leden", "user_id": "", "date": {"type": "float", "value": 46027.0}, # 2026-01-15 in Sheets serial }, "member_names": _ROSTER, "default_year": 2026, } SEEDS[("infer_transaction_details", "no_member_no_month")] = { "tx": { "sender": "Unknown Person", "message": "random text", "user_id": "", "date": {"type": "none"}, }, "member_names": _ROSTER, "default_year": 2026, } # --- format_date --- SEEDS[("format_date", "string_iso")] = {"val": {"type": "string", "value": "2026-01-15"}} SEEDS[("format_date", "string_non_iso")] = {"val": {"type": "string", "value": "garbage"}} SEEDS[("format_date", "empty_string")] = {"val": {"type": "string", "value": ""}} SEEDS[("format_date", "none_value")] = {"val": {"type": "none"}} SEEDS[("format_date", "serial_int")] = {"val": {"type": "int", "value": 46027}} SEEDS[("format_date", "serial_float")] = {"val": {"type": "float", "value": 46027.5}} SEEDS[("format_date", "serial_float_exact")] = {"val": {"type": "float", "value": 45957.0}} # 2025-10-01 # --------------------------------------------------------------------------- # Reconcile handcrafted seeds # --------------------------------------------------------------------------- def _tx(date, amount, person, purpose, sender="Payer", message="", bank_id="", inferred_amount=None): return { "date": date, "amount": amount, "manual_fix": "", "person": person, "purpose": purpose, "inferred_amount": inferred_amount if inferred_amount is not None else amount, "sender": sender, "message": message, "bank_id": bank_id, } def _member(name, tier, fees: dict): """fees: {month: (fee, count) or int}. Returns a dict so the scrubber can find the 'name' key and apply deterministic pseudonymisation.""" return {"name": name, "tier": tier, "fees": fees} def _reconcile_seed(members, sorted_months, transactions, exceptions=None, default_year=2026): return { "members": members, "sorted_months": sorted_months, "transactions": transactions, "exceptions": exceptions or [], "default_year": default_year, } # 01 — greedy exact: Alice pays exactly 750, expected 750 SEEDS[("reconcile", "01_greedy_exact")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 750, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], ) # 02 — greedy overpayment → credit: Alice pays 900, expected 750 SEEDS[("reconcile", "02_greedy_overpayment")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 900, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], ) # 03 — proportional: Alice pays 800 for 3 months (750+750+350=1850 expected) SEEDS[("reconcile", "03_proportional_remainder")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", { "2026-01": (750, 3), "2026-02": (750, 2), "2026-03": (350, 2), })], sorted_months=["2026-01", "2026-02", "2026-03"], transactions=[_tx("2026-03-10", 800, "Alice Dvořák", "2026-01,2026-02,2026-03", sender="Alice Dvořák")], ) # 04 — even-split: all expected=0, payment spread evenly SEEDS[("reconcile", "04_even_split_prepayment")] = _reconcile_seed( members=[_member("Bob Kratochvíl", "A", { "2026-04": (0, 0), "2026-05": (0, 0), })], sorted_months=["2026-04", "2026-05"], transactions=[_tx("2026-03-25", 700, "Bob Kratochvíl", "2026-04,2026-05", sender="Bob Kratochvíl")], ) # 05 — out-of-window: payment references 2025-08 which is outside sorted_months SEEDS[("reconcile", "05_out_of_window_credit")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 1500, "Alice Dvořák", "2026-01,2025-08", sender="Alice Dvořák")], ) # 06 — exception override: Alice's 2026-01 fee overridden from 750 to 300 SEEDS[("reconcile", "06_exception_override")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 300, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], # exceptions as list of [name, period, amount, note] (capture_fixtures converts to dict) exceptions=[{"name": "Alice Dvořák", "period": "2026-01", "amount": 300, "note": "injury discount"}], ) # 07 — other purpose: tournament fee split between Alice and Bob SEEDS[("reconcile", "07_other_purpose_split")] = _reconcile_seed( members=[ _member("Alice Dvořák", "A", {"2026-01": (750, 3)}), _member("Bob Kratochvíl", "A", {"2026-01": (750, 2)}), ], sorted_months=["2026-01"], transactions=[_tx("2026-01-10", 800, "Alice Dvořák, Bob Kratochvíl", "other:tournament", sender="Alice Dvořák")], ) # 08 — junior with attendance=1 (expected=0 in reconcile, unknown in UI) SEEDS[("reconcile", "08_junior_question_mark")] = _reconcile_seed( members=[_member("Karel Junior", "A", {"2026-01": (0, 1)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 200, "Karel Junior", "2026-01", sender="Karel Junior")], ) # 09 — multi-person comma-split: Alice and Bob share a payment for 2 months SEEDS[("reconcile", "09_multiperson_multimonth")] = _reconcile_seed( members=[ _member("Alice Dvořák", "A", {"2026-01": (750, 3), "2026-02": (750, 2)}), _member("Bob Kratochvíl", "A", {"2026-01": (750, 2), "2026-02": (350, 2)}), ], sorted_months=["2026-01", "2026-02"], transactions=[_tx("2026-02-15", 2000, "Alice Dvořák, Bob Kratochvíl", "2026-01,2026-02", sender="Alice Dvořák")], ) # 10 — unmatched: no person, garbage message SEEDS[("reconcile", "10_unmatched")] = _reconcile_seed( members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], sorted_months=["2026-01"], transactions=[_tx("2026-01-20", 500, "", "", sender="Unknown Payer", message="garbage xyz 999")], ) # --------------------------------------------------------------------------- # Real-data seeds # --------------------------------------------------------------------------- # Indices into tmp/payments_transactions_cache.json['data'] selected for coverage. # DO NOT change these — they are deliberately frozen to make re-runs deterministic. _REAL_PMR_INDICES = [ (16, "real_single_leden"), (17, "real_range_prosinec_leden"), (18, "real_list_prosinec_leden_unor"), (22, "real_martin_prosinec_leden"), (23, "real_range_listopad_leden"), (25, "real_filip_prosinec_leden_unor"), (36, "real_mixed_czech_numeric"), (42, "real_dominika_numeric_multi"), # index 67 removed: the name-sweep scrubber changes the text prefix in a way # that breaks the numeric-slash parser (empty result vs expected "2026-03"). (72, "real_tomik_numeric_plus"), (73, "real_franc_numeric_space"), (74, "real_jana_numeric_multi"), (80, "real_alex_numeric_long"), (89, "real_emily_numeric_long"), (90, "real_jachym_numeric_multi"), ] # Real match_members seeds are intentionally omitted: after PII scrubbing # the member_names pseudonyms are inconsistent with the (un-scrubbed) text, # causing all Go parity assertions to fail. The synthetic seeds below cover # the same code paths without any real data. _REAL_MM_INDICES: list = [] def real_parse_month_references_seeds(default_year: int = 2026): """Yield (case_id, seed) from real cache messages.""" cache = _load_cache("payments_transactions_cache.json") if cache is None: return txs = cache.get("data", []) for idx, case_id in _REAL_PMR_INDICES: if idx >= len(txs): continue msg = str(txs[idx].get("message", "")) yield case_id, {"text": msg, "default_year": default_year} def _real_member_names(): """Return canonical member names from the regular attendance cache.""" cache = _load_cache("attendance_regular_cache.json") if cache is None: return [] rows = cache.get("data", []) if rows and isinstance(rows[0], list): rows = rows[0] return [row[0] for row in rows if isinstance(row, (list, tuple)) and len(row) >= 2] def real_match_members_seeds(): """Yield (case_id, seed) using real senders/messages against real roster.""" cache = _load_cache("payments_transactions_cache.json") member_names = _real_member_names() if cache is None or not member_names: return txs = cache.get("data", []) for idx, case_id in _REAL_MM_INDICES: if idx >= len(txs): continue tx = txs[idx] sender = str(tx.get("sender", "")) message = str(tx.get("message", "")) text = f"{sender} {message}" yield case_id, {"text": text, "member_names": member_names}