feat(go): fixture capture + characterization framework (M3)
All checks were successful
Deploy to K8s / deploy (push) Successful in 7s
All checks were successful
Deploy to K8s / deploy (push) Successful in 7s
Closes M3.1–M3.6. Parity safety net proving Go output matches Python
for every ported pure-domain function (M2.1–M2.9) and reconcile (M2.10).
Capture pipeline:
- scripts/capture_fixtures.py: calls each Python function with seeded
inputs, emits JSON fixtures to stdout (never writes files directly).
- scripts/scrub_fixtures.py: deterministic PII scrubber — SHA-256
pseudonyms for member names, digit-preserving hashes for VS/account/
bank_id, name-sweep in message text. Idempotent; no salt.
- scripts/_fixture_seeds.py: handcrafted seeds for all 11 functions;
synthetic names throughout (no real roster members).
- scripts/capture_all_fixtures.sh: convenience wrapper for full corpus
regeneration outside of make.
Fixture corpus (98 files, all PII-free):
- go/tests/fixtures/pure/<func>/<case>.json — 10 function directories.
- go/tests/fixtures/reconcile/<NN>_<case>.json — 10 branch-coverage
cases: greedy, overpayment credit, proportional remainder, even-split,
out-of-window, exception override, other: purpose, junior ?, multi-
person+month fan-out, unmatched.
Go parity tests (//go:build parity):
- go/tests/parity/parityio.go: generic LoadDir/RunAll helpers + typed
In/Out struct pairs for all 10 pure functions; Envelope decoder for
int/float/none disambiguation.
- 10 pure-function test packages + bespoke reconcile test with per-cell
float tolerance (math.Abs <= 0.01 for `paid` values).
Makefile: go-parity, go-test-all, capture-fixtures targets.
go/tests/fixtures/README.md: refresh workflow + PII audit guide.
Gate: make go-test green, make go-parity green (11/11 packages),
make go-lint clean (parity tag), make go-build clean.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
353
scripts/capture_fixtures.py
Normal file
353
scripts/capture_fixtures.py
Normal file
@@ -0,0 +1,353 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Capture pure-function output as JSON fixtures for parity testing.
|
||||
|
||||
Each invocation emits exactly one JSON object to stdout.
|
||||
Pipe through scrub_fixtures.py before writing to go/tests/fixtures/.
|
||||
|
||||
Usage:
|
||||
# Single case:
|
||||
python capture_fixtures.py --func normalize --case simple_ascii \\
|
||||
--input-seed simple_ascii | python scrub_fixtures.py \\
|
||||
> go/tests/fixtures/pure/normalize/simple_ascii.json
|
||||
|
||||
# All seeds for a function (newline-delimited JSON, one object per line):
|
||||
python capture_fixtures.py --func normalize --all
|
||||
|
||||
# Feed input from stdin (for ad-hoc cases):
|
||||
echo '{"text":"hello"}' | python capture_fixtures.py --func normalize \\
|
||||
--case adhoc --input-stdin
|
||||
|
||||
See scripts/_fixture_seeds.py for the seed registry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import datetime
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from czech_utils import normalize, parse_month_references
|
||||
from attendance import calculate_fee, calculate_junior_fee
|
||||
from infer_payments import parse_czk_amount
|
||||
from sync_fio_to_sheets import generate_sync_id as _py_generate_sync_id
|
||||
from match_payments import (
|
||||
_build_name_variants,
|
||||
match_members,
|
||||
infer_transaction_details,
|
||||
format_date,
|
||||
reconcile,
|
||||
)
|
||||
from czech_utils import normalize as _norm
|
||||
|
||||
import _fixture_seeds as seeds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Type-envelope helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _decode_envelope(envelope):
|
||||
"""Convert a {type, value} envelope to a Python value for function calls."""
|
||||
if not isinstance(envelope, dict):
|
||||
return envelope
|
||||
t = envelope.get("type", "raw")
|
||||
v = envelope.get("value")
|
||||
if t == "none":
|
||||
return None
|
||||
if t == "int":
|
||||
return int(v)
|
||||
if t == "float":
|
||||
return float(v)
|
||||
if t == "string":
|
||||
return v
|
||||
return v # raw JSON value (for fields that don't use an envelope)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-function capture implementations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def capture_normalize(inp: dict) -> dict:
|
||||
result = normalize(inp["text"])
|
||||
return {"text": result}
|
||||
|
||||
|
||||
def capture_parse_month_references(inp: dict) -> dict:
|
||||
result = parse_month_references(inp["text"], inp.get("default_year", 2026))
|
||||
return {"months": result}
|
||||
|
||||
|
||||
def capture_calculate_fee(inp: dict) -> dict:
|
||||
result = calculate_fee(inp["attendance_count"], inp["month_key"])
|
||||
return {"fee": result}
|
||||
|
||||
|
||||
def capture_calculate_junior_fee(inp: dict) -> dict:
|
||||
raw = calculate_junior_fee(inp["attendance_count"], inp["month_key"])
|
||||
if raw == "?":
|
||||
return {"value": 0, "unknown": True}
|
||||
return {"value": int(raw), "unknown": False}
|
||||
|
||||
|
||||
def capture_parse_czk_amount(inp: dict) -> dict:
|
||||
val = _decode_envelope(inp["val"])
|
||||
result = parse_czk_amount(val)
|
||||
return {"amount": float(result)}
|
||||
|
||||
|
||||
def capture_generate_sync_id(inp: dict) -> dict:
|
||||
tx_in = inp["tx"]
|
||||
# Build the tx dict that generate_sync_id expects:
|
||||
# amount must be the Python-native type to replicate str(amount) faithfully.
|
||||
tx = {k: v for k, v in tx_in.items() if k != "amount"}
|
||||
tx["amount"] = _decode_envelope(tx_in["amount"])
|
||||
result = _py_generate_sync_id(tx)
|
||||
return {"sync_id": result}
|
||||
|
||||
|
||||
def capture_build_name_variants(inp: dict) -> dict:
|
||||
result = _build_name_variants(inp["full_name"])
|
||||
return {"variants": result}
|
||||
|
||||
|
||||
def capture_match_members(inp: dict) -> dict:
|
||||
matches = match_members(inp["text"], inp["member_names"])
|
||||
return {
|
||||
"matches": [{"name": name, "confidence": conf} for name, conf in matches]
|
||||
}
|
||||
|
||||
|
||||
def capture_infer_transaction_details(inp: dict) -> dict:
|
||||
tx_in = inp["tx"]
|
||||
tx = dict(tx_in)
|
||||
tx["date"] = _decode_envelope(tx_in.get("date"))
|
||||
result = infer_transaction_details(tx, inp["member_names"])
|
||||
return {
|
||||
"matches": [{"name": n, "confidence": c} for n, c in result["members"]],
|
||||
"months": result["months"],
|
||||
"search_text": result.get("search_text", result.get("matched_text", "")),
|
||||
}
|
||||
|
||||
|
||||
def capture_format_date(inp: dict) -> dict:
|
||||
val = _decode_envelope(inp["val"])
|
||||
result = format_date(val)
|
||||
return {"date": result}
|
||||
|
||||
|
||||
def _build_exceptions(exc_list):
|
||||
"""Convert seed exceptions to the dict reconcile() expects.
|
||||
Accepts both the legacy list format [name, period, amount, note] and the
|
||||
new dict format {"name": ..., "period": ..., "amount": ..., "note": ...}."""
|
||||
if not exc_list:
|
||||
return {}
|
||||
result = {}
|
||||
for row in exc_list:
|
||||
if isinstance(row, dict):
|
||||
name = row.get("name", "")
|
||||
period = row.get("period", "")
|
||||
amount = row.get("amount", 0)
|
||||
note = row.get("note", "")
|
||||
else:
|
||||
name, period, amount = row[0], row[1], row[2]
|
||||
note = row[3] if len(row) > 3 else ""
|
||||
result[(_norm(name), _norm(period))] = {"amount": int(amount), "note": note}
|
||||
return result
|
||||
|
||||
|
||||
def _member_fee_dict(fees_raw: dict) -> dict:
|
||||
"""Convert seed fees dict to the form reconcile() expects."""
|
||||
# Seeds store fees as [fee, count] lists (JSON) or (fee, count) tuples.
|
||||
result = {}
|
||||
for month, v in fees_raw.items():
|
||||
if isinstance(v, (list, tuple)) and len(v) == 2:
|
||||
result[month] = (int(v[0]), int(v[1]))
|
||||
else:
|
||||
result[month] = int(v)
|
||||
return result
|
||||
|
||||
|
||||
def _tx_entry_out(tx):
|
||||
"""Convert a reconcile output TxEntry dict to a serializable form."""
|
||||
return {
|
||||
"amount": float(tx.get("amount", 0)),
|
||||
"date": tx.get("date", ""),
|
||||
"sender": tx.get("sender", ""),
|
||||
"message": tx.get("message", ""),
|
||||
"confidence": tx.get("confidence", ""),
|
||||
}
|
||||
|
||||
|
||||
def _other_entry_out(e):
|
||||
return {
|
||||
"amount": float(e.get("amount", 0)),
|
||||
"date": e.get("date", ""),
|
||||
"sender": e.get("sender", ""),
|
||||
"message": e.get("message", ""),
|
||||
"purpose": e.get("purpose", ""),
|
||||
"confidence": e.get("confidence", ""),
|
||||
}
|
||||
|
||||
|
||||
def _month_data_out(md):
|
||||
return {
|
||||
"expected": int(md["expected"]) if isinstance(md["expected"], (int, float)) else 0,
|
||||
"original_expected": int(md["original_expected"]) if isinstance(md.get("original_expected"), (int, float)) else 0,
|
||||
"attendance_count": int(md.get("attendance_count", 0)),
|
||||
"exception": md.get("exception"),
|
||||
"paid": float(md["paid"]),
|
||||
"transactions": [_tx_entry_out(t) for t in md.get("transactions", [])],
|
||||
}
|
||||
|
||||
|
||||
def _unmatched_tx_out(tx):
|
||||
return {
|
||||
"date": tx.get("date", ""),
|
||||
"amount": float(tx.get("amount", 0)),
|
||||
"person": tx.get("person", ""),
|
||||
"purpose": tx.get("purpose", ""),
|
||||
"sender": tx.get("sender", ""),
|
||||
"message": tx.get("message", ""),
|
||||
"bank_id": tx.get("bank_id", ""),
|
||||
}
|
||||
|
||||
|
||||
def capture_reconcile(inp: dict) -> dict:
|
||||
# Convert members from seed format to reconcile() format.
|
||||
# Accepts both the new dict format {"name":..., "tier":..., "fees":{...}}
|
||||
# and the legacy tuple format [name, tier, fees_dict].
|
||||
members_in = inp["members"]
|
||||
members = []
|
||||
for m in members_in:
|
||||
if isinstance(m, dict):
|
||||
name, tier, fees_raw = m["name"], m["tier"], m.get("fees", {})
|
||||
else:
|
||||
name, tier, fees_raw = m[0], m[1], m[2]
|
||||
members.append((name, tier, _member_fee_dict(fees_raw)))
|
||||
|
||||
exceptions = _build_exceptions(inp.get("exceptions") or [])
|
||||
sorted_months = inp["sorted_months"]
|
||||
transactions = inp["transactions"]
|
||||
|
||||
result = reconcile(members, sorted_months, transactions, exceptions)
|
||||
|
||||
members_out = {}
|
||||
for name, mr in result["members"].items():
|
||||
members_out[name] = {
|
||||
"tier": mr["tier"],
|
||||
"months": {m: _month_data_out(md) for m, md in mr["months"].items()},
|
||||
"other_transactions": [_other_entry_out(e) for e in mr.get("other_transactions", [])],
|
||||
"total_balance": int(mr["total_balance"]),
|
||||
}
|
||||
|
||||
return {
|
||||
"members": members_out,
|
||||
"unmatched": [_unmatched_tx_out(tx) for tx in result["unmatched"]],
|
||||
"credits": {k: int(v) for k, v in result["credits"].items()},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DISPATCHERS = {
|
||||
"normalize": capture_normalize,
|
||||
"parse_month_references": capture_parse_month_references,
|
||||
"calculate_fee": capture_calculate_fee,
|
||||
"calculate_junior_fee": capture_calculate_junior_fee,
|
||||
"parse_czk_amount": capture_parse_czk_amount,
|
||||
"generate_sync_id": capture_generate_sync_id,
|
||||
"build_name_variants": capture_build_name_variants,
|
||||
"match_members": capture_match_members,
|
||||
"infer_transaction_details": capture_infer_transaction_details,
|
||||
"format_date": capture_format_date,
|
||||
"reconcile": capture_reconcile,
|
||||
}
|
||||
|
||||
_FUNC_MODULE = {
|
||||
"normalize": "scripts.czech_utils.normalize",
|
||||
"parse_month_references": "scripts.czech_utils.parse_month_references",
|
||||
"calculate_fee": "scripts.attendance.calculate_fee",
|
||||
"calculate_junior_fee": "scripts.attendance.calculate_junior_fee",
|
||||
"parse_czk_amount": "scripts.infer_payments.parse_czk_amount",
|
||||
"generate_sync_id": "scripts.sync_fio_to_sheets.generate_sync_id",
|
||||
"build_name_variants": "scripts.match_payments._build_name_variants",
|
||||
"match_members": "scripts.match_payments.match_members",
|
||||
"infer_transaction_details": "scripts.match_payments.infer_transaction_details",
|
||||
"format_date": "scripts.match_payments.format_date",
|
||||
"reconcile": "scripts.match_payments.reconcile",
|
||||
}
|
||||
|
||||
|
||||
def _emit(func_name: str, case_id: str, inp: dict) -> None:
|
||||
dispatch = _DISPATCHERS[func_name]
|
||||
output = dispatch(inp)
|
||||
doc = {
|
||||
"case": case_id,
|
||||
"func": _FUNC_MODULE[func_name],
|
||||
"captured_at": datetime.date.today().isoformat(),
|
||||
"input": inp,
|
||||
"output": output,
|
||||
}
|
||||
print(json.dumps(doc, ensure_ascii=False))
|
||||
|
||||
|
||||
def _all_seeds(func_name: str):
|
||||
"""Yield (case_id, seed) for all seeds of a function."""
|
||||
for (fn, case_id), seed in seeds.SEEDS.items():
|
||||
if fn == func_name:
|
||||
yield case_id, seed
|
||||
|
||||
# Real-data seeds
|
||||
if func_name == "parse_month_references":
|
||||
yield from seeds.real_parse_month_references_seeds()
|
||||
if func_name == "match_members":
|
||||
yield from seeds.real_match_members_seeds()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Capture pure-function outputs as JSON fixtures."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--func", required=True, choices=list(_DISPATCHERS), help="Function to capture."
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--case", help="Case ID (file stem). Use with --input-seed or --input-stdin.")
|
||||
group.add_argument("--all", action="store_true", help="Emit all seeds for the function.")
|
||||
parser.add_argument(
|
||||
"--input-seed", metavar="SEED_ID",
|
||||
help="Seed key in _fixture_seeds.SEEDS (required unless --input-stdin or --all).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-stdin", action="store_true",
|
||||
help="Read input JSON from stdin instead of seed registry.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.all:
|
||||
for case_id, seed in _all_seeds(args.func):
|
||||
_emit(args.func, case_id, seed)
|
||||
return
|
||||
|
||||
# Single case
|
||||
if args.input_stdin:
|
||||
inp = json.load(sys.stdin)
|
||||
elif args.input_seed:
|
||||
key = (args.func, args.input_seed)
|
||||
if key not in seeds.SEEDS:
|
||||
sys.exit(f"Seed ({args.func!r}, {args.input_seed!r}) not found in _fixture_seeds.SEEDS")
|
||||
inp = seeds.SEEDS[key]
|
||||
else:
|
||||
parser.error("Provide --input-seed SEED_ID or --input-stdin.")
|
||||
|
||||
_emit(args.func, args.case, inp)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user