diff --git a/CHANGELOG.md b/CHANGELOG.md index f4e4d5e..fff61fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 2026-05-06 23:25 CEST — feat(go/M3): fixture capture + parity test framework + +- `scripts/capture_fixtures.py`: dispatcher CLI that calls each ported function with seeded inputs and emits captured output as JSON fixtures. +- `scripts/scrub_fixtures.py`: deterministic PII scrubber (SHA-256 pseudonyms, digit-preserving account/VS hashes, name-sweep in free text). +- `scripts/_fixture_seeds.py`: handcrafted seed registry for all 10 pure functions + 10 reconcile branch-coverage cases. +- 98 fixture files committed under `go/tests/fixtures/pure//` and `go/tests/fixtures/reconcile/`; all PII-free. +- `go/tests/parity/parityio.go`: shared loader with generic `LoadDir`/`RunAll` helpers and typed `In`/`Out` structs for all 10 functions. +- 11 parity test packages under `//go:build parity`: 10 pure-function tests + bespoke reconcile test with per-cell float tolerance. +- Makefile: `go-parity`, `go-test-all`, `capture-fixtures` targets. +- `go/tests/fixtures/README.md`: refresh workflow, PII audit guide, adding-a-fixture steps. + ## 2026-05-06 17:49 CEST — feat(go/M2.11-12): wire fuj fees + fuj reconcile subcommands - New `go/internal/services/membership` package: `AttendanceLoader`, `TransactionLoader`, `ExceptionLoader` interfaces, a stub (`NewStubSources`) that returns `ErrIOPending`, and `FeesReport` / `ReconcileReport` orchestration functions backed by real `domain/fees` + `domain/reconcile` logic. diff --git a/Makefile b/Makefile index 21718d5..4c521d4 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help fees match web web-py web-debug web-go go-build go-test go-run go-lint image run sync sync-2026 test test-v docs +.PHONY: help fees match web web-py web-debug web-go go-build go-test go-test-all go-parity go-run go-lint capture-fixtures image run sync sync-2026 test test-v docs export PYTHONPATH := scripts:$(PYTHONPATH) VENV := .venv @@ -23,8 +23,11 @@ help: @echo " make web-go - Build and start Go dashboard on :8080" @echo " make web-debug - Start Python dashboard in debug mode" @echo " make go-build - Build Go binary to bin/fuj" - @echo " make go-test - Run Go tests" + @echo " make go-test - Run Go unit tests" + @echo " make go-parity - Run Go parity tests (requires -tags=parity fixture corpus)" + @echo " make go-test-all - Run both unit and parity tests" @echo " make go-lint - Run golangci-lint on Go code" + @echo " make capture-fixtures - Regenerate parity fixture corpus from live Python" @echo " make image - Build Python OCI container image" @echo " make run - Run the built Python Docker image locally" @echo " make sync - Sync Fio transactions to Google Sheets" @@ -64,6 +67,27 @@ go-build: go-test: cd $(GO_SRC) && go test -race ./... +go-parity: + cd $(GO_SRC) && go test -tags=parity ./tests/parity/... + +go-test-all: go-test go-parity + +capture-fixtures: $(PYTHON) + @echo "Capturing and scrubbing fixtures for all registered functions..." + @for func in normalize parse_month_references calculate_fee calculate_junior_fee \ + parse_czk_amount generate_sync_id build_name_variants match_members \ + infer_transaction_details format_date reconcile; do \ + dir="go/tests/fixtures/$$([[ $$func == reconcile ]] && echo reconcile || echo pure/$$func)"; \ + mkdir -p "$$dir"; \ + PYTHONPATH=scripts:. $(PYTHON) scripts/capture_fixtures.py --func $$func --all \ + | while IFS= read -r line; do \ + case_id=$$(echo "$$line" | $(PYTHON) -c "import sys,json; print(json.load(sys.stdin)['case'])"); \ + echo "$$line" | $(PYTHON) scripts/scrub_fixtures.py > "$$dir/$${case_id}.json"; \ + done; \ + echo " $$func done"; \ + done + @echo "capture-fixtures complete." + go-run: go-build ./$(GO_BIN) $(ARGS) diff --git a/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md b/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md index 3e3f667..c4ea7b2 100644 --- a/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md +++ b/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md @@ -2,7 +2,7 @@ Companion to [2026-05-03-2349-go-backend-rewrite.md](2026-05-03-2349-go-backend-rewrite.md). -**Current milestone:** M2 — Pure-domain helpers +**Current milestone:** M3 — Fixture capture + characterization framework ✅ **Started:** 2026-05-04 **Last updated:** 2026-05-06 @@ -65,14 +65,14 @@ Each task: port the function, write Go unit tests for fresh cases, hook into the Goal: deterministic, PII-free fixture corpus that drives parity tests. Runs in parallel with M2 (M3.1/M3.2 unblocks M2.1). -- [ ] **M3.1** `scripts/capture_fixtures.py` — pure-function output dumper. Reads inputs from stdin / argv, prints `{"input":..., "output":...}` JSON -- [ ] **M3.2** `scripts/scrub_fixtures.py` — replaces names with `Member_<8hex>` (deterministic per name); scrambles sender/account/VS/bank_id with stable bijection; preserves dates, amounts, exception keys -- [ ] **M3.3** Capture pure-fn fixtures for M2.1–M2.9 (run helper + scrubber, commit to `tests/fixtures/pure//.json`) -- [ ] **M3.4** Capture ~10 reconcile fixtures spanning every code path: greedy, proportional (float remainder), even-split, out-of-window credit, exception override, `other:` purpose, junior `"?"`, multi-person comma-split, multi-month range, unmatched. Commit to `tests/fixtures/reconcile/` -- [ ] **M3.5** Hook fixtures into Tier-1 test runner with `-tags=parity` build constraint -- [ ] **M3.6** Document fixture-refresh workflow in `tests/fixtures/README.md` (what to do when sheet schema changes) +- [x] **M3.1** `scripts/capture_fixtures.py` — pure-function output dumper. Reads inputs from stdin / argv, prints `{"input":..., "output":...}` JSON +- [x] **M3.2** `scripts/scrub_fixtures.py` — replaces names with `Member_<8hex>` (deterministic per name); scrambles sender/account/VS/bank_id with stable bijection; preserves dates, amounts, exception keys +- [x] **M3.3** Capture pure-fn fixtures for M2.1–M2.9 (run helper + scrubber, commit to `tests/fixtures/pure//.json`) +- [x] **M3.4** Capture ~10 reconcile fixtures spanning every code path: greedy, proportional (float remainder), even-split, out-of-window credit, exception override, `other:` purpose, junior `"?"`, multi-person comma-split, multi-month range, unmatched. Commit to `tests/fixtures/reconcile/` +- [x] **M3.5** Hook fixtures into Tier-1 test runner with `-tags=parity` build constraint +- [x] **M3.6** Document fixture-refresh workflow in `tests/fixtures/README.md` (what to do when sheet schema changes) -**Gate:** `tests/fixtures/` populated; M2 parity tests green; raw `tmp/*.json` confirmed gitignored. +**Gate:** ✅ `tests/fixtures/` populated (98 files); `make go-parity` green; `make go-lint` (parity tag) clean; raw `tmp/*.json` confirmed gitignored. --- diff --git a/docs/plans/2026-05-06-2111-go-m3-fixture-capture.md b/docs/plans/2026-05-06-2111-go-m3-fixture-capture.md new file mode 100644 index 0000000..f88eabc --- /dev/null +++ b/docs/plans/2026-05-06-2111-go-m3-fixture-capture.md @@ -0,0 +1,261 @@ +# M3 — Fixture capture + characterization framework + +> On approval: copy this plan to `docs/plans/2026-05-06-2111-go-m3-fixture-capture.md` per [CLAUDE.md](../../srv/personal/fuj-management/CLAUDE.md) plan-location convention. + +## Context + +The Go rewrite (tracked in [docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md](../../srv/personal/fuj-management/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md)) finished M2.1–M2.12 — every pure-domain helper is ported and the `fuj fees` / `fuj reconcile` CLIs are wired. M3 closes the loop: it builds the **parity safety net** that proves Go output matches Python output for every ported function. Without it, M2 is "trust me", and the rewrite has no defensible cutover criterion. + +M3 has three deliverables: + +1. **A capture pipeline** (`scripts/capture_fixtures.py` + `scripts/scrub_fixtures.py`) that produces deterministic, PII-free JSON fixtures from the live Python implementations. +2. **A fixture corpus** at [go/tests/fixtures/](../../srv/personal/fuj-management/go/tests/fixtures/) covering the 10 pure functions of M2 (M2.1–M2.9) plus 10 reconcile cases spanning every code path of `reconcile()` (M2.10). +3. **A parity test runner** in [go/tests/parity/](../../srv/personal/fuj-management/go/tests/parity/) under `//go:build parity` that replays each fixture and asserts byte/value equality against the Go port. + +User-confirmed scope decisions: +- **Single MR** for all six sub-tasks (M3.1–M3.6) — they're tightly coupled; no half-state is committable. +- **Type envelope only where it matters** — four fields (`generate_sync_id.tx.amount`, `parse_czk_amount.val`, `format_date.val`, `infer_transaction_details.tx.date`) use `{"type":..., "value":...}` to disambiguate int/float/none. Everything else uses raw JSON. +- **Real seeds for `parse_month_references` and `match_members` only** — read curated message strings from `tmp/payments_transactions_cache.json`, scrub, ship. Other functions stay on handcrafted seeds. +- **Plan committed at `docs/plans/2026-05-06-2111-go-m3-fixture-capture.md`** — same convention as every M-series predecessor. + +## Branch + landing + +- Branch: `feat/m3-fixture-capture`. Single MR via `tea pr create`. Tick M3.1–M3.6 on merge with the SHA. +- No edits to existing Python or Go production code. M3 is purely additive: new scripts, new fixtures, new test files, new Makefile targets, README, CHANGELOG entry, plan archive, progress tracker tick. + +## File layout + +**Python (capture pipeline):** +- [scripts/capture_fixtures.py](../../srv/personal/fuj-management/scripts/capture_fixtures.py) — dispatcher CLI; one entry per function via `--func`. +- [scripts/scrub_fixtures.py](../../srv/personal/fuj-management/scripts/scrub_fixtures.py) — stdin→stdout deterministic bijection scrubber. +- [scripts/_fixture_seeds.py](../../srv/personal/fuj-management/scripts/_fixture_seeds.py) — internal: handcrafted seeds keyed by `(func, case_id)`, plus the curated real-message extractor. + +**Fixture corpus** (committed, PII-free): +- [go/tests/fixtures/README.md](../../srv/personal/fuj-management/go/tests/fixtures/README.md) — refresh workflow + scrubbing audit guide. +- `go/tests/fixtures/pure//.json` — one directory per function (10 functions: `normalize`, `parse_month_references`, `calculate_fee`, `calculate_junior_fee`, `parse_czk_amount`, `generate_sync_id`, `build_name_variants`, `match_members`, `infer_transaction_details`, `format_date`). +- `go/tests/fixtures/reconcile/_.json` — 10 numbered reconcile cases. + +**Go parity tests** (all under `//go:build parity`): +- [go/tests/parity/parityio.go](../../srv/personal/fuj-management/go/tests/parity/parityio.go) — shared loader with generic `Case[I,O]` walker, type envelopes mirrored from §3, float tolerance helper. +- [go/tests/parity/pure//_parity_test.go](../../srv/personal/fuj-management/go/tests/parity/pure/) — one file per function, ~30 lines each. +- [go/tests/parity/reconcile/reconcile_parity_test.go](../../srv/personal/fuj-management/go/tests/parity/reconcile/) — bespoke comparator using `math.Abs(got-want) <= 0.01` for `paid` floats, exact equality on int balances. + +**Modified:** +- [Makefile](../../srv/personal/fuj-management/Makefile) — append `go-parity`, `go-test-all`, `capture-fixtures` targets. +- [CHANGELOG.md](../../srv/personal/fuj-management/CHANGELOG.md) — single entry at top. +- [docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md](../../srv/personal/fuj-management/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md) — tick M3.1–M3.6 with SHA. + +## Capture invocation interface + +Two-stage pipeline (capture | scrub) so each stage is independently debuggable: + +```bash +python scripts/capture_fixtures.py --func --case --input-seed \ + | python scripts/scrub_fixtures.py \ + > go/tests/fixtures/pure//.json +``` + +Capture flags: +- `--func` — target function (`normalize`, `reconcile`, etc.). +- `--case` — human-authored case ID, becomes the file stem. Never auto-generated (auto-IDs cause git churn). +- `--input-seed ` — pull from `_fixture_seeds.py` registry (the default mode for handcrafted cases). +- `--input-stdin` — read a single JSON `{"args":[...], "kwargs":{...}}` doc from stdin (used by the real-message extractor for `parse_month_references` / `match_members`). +- `--all` — iterate every seed for one function, emit newline-delimited JSON to stdout. Used by the `make capture-fixtures` recipe. + +Capture **never writes files**. Output goes to stdout; the caller redirects. The scrubber is always stdin→stdout. Both are pure transforms. + +The `make capture-fixtures` target codifies the full refresh workflow. Humans read the target before they read the README. + +## Fixture JSON shape (normative) + +One JSON object per case: + +```json +{ + "case": "range_wrap_nov_to_jan", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { ... }, + "output": { ... } +} +``` + +`captured_at` is date-only — same-day re-runs produce byte-identical files. No git SHA, no hostname, no time component. + +### Per-function input/output schemas + +The schema is the **stable contract** between Python capture and Go consumption. Where Python returns heterogeneous types, the capture step pre-translates to the typed shape Go expects. + +| Function | Input | Output | +|---|---|---| +| `normalize` | `{"text":"…"}` | `{"text":"…"}` | +| `parse_month_references` | `{"text":"…","default_year":2026}` | `{"months":["2026-01",…]}` | +| `calculate_fee` | `{"attendance_count":3,"month_key":"2026-02"}` | `{"fee":750}` | +| `calculate_junior_fee` | `{"attendance_count":1,"month_key":"2026-02"}` | `{"value":0,"unknown":true}` (mirrors `fees.Expected{Value, Unknown}`) | +| `parse_czk_amount` | `{"val":}` | `{"amount":1500.0}` | +| `generate_sync_id` | `{"tx":{"date":"…","amount":,"currency":"CZK","sender":"…","vs":"…","message":"…","bank_id":"…"}}` | `{"sync_id":""}` | +| `_build_name_variants` | `{"name":"…"}` | `{"variants":["…"]}` | +| `match_members` | `{"text":"…","member_names":["…"]}` | `{"matches":[{"name":"…","confidence":"auto"}]}` | +| `infer_transaction_details` | `{"tx":{"sender":"…","message":"…","user_id":"…","date":},"member_names":[…],"default_year":2026}` | `{"members":[…],"months":[…],"search_text":"…"}` | +| `format_date` | `{"val":}` | `{"date":"…"}` | + +**Type envelope** (used in 4 fields above): + +```json +{"type":"int","value":750} // distinguishes 750 from 750.0 +{"type":"float","value":750.0} +{"type":"string","value":"…"} +{"type":"none"} +``` + +The envelope is the answer to the `generate_sync_id` parity risk: Python's `str(750.0) == "750.0"` vs `str(750) == "750"` produces different SHA-256 inputs. JSON natively conflates these; the envelope round-trips them. Go's loader switches on `type` and constructs the matching native value before calling the port. + +**`reconcile`** uses raw JSON for everything (its inputs are typed maps/slices already), with one nuance: the `Member.fees[month]` value can be an `int` or a `(fee, count)` tuple per [match_payments.py:339-340](../../srv/personal/fuj-management/scripts/match_payments.py#L339). Capture normalises both to `{"fee":int,"count":int}` so Go side has one shape. + +## Scrubber strategy + +`scrub_fixtures.py`: stdin → stdout, no state, no salt, no random. Deterministic plain SHA-256. Re-runs are idempotent. Trade-off acknowledged: an attacker with the script can mathematically reverse the mapping. That's fine — the scrubber's job is to keep PII out of git diffs and Claude transcripts, not to defend against an adversary with the source tree. + +### Scramble whitelist (only these field keys are scrambled) + +`name`, `member_names[]`, `person`, `sender`, `sender_account`, `account`, `vs`, `bank_id`, `user_id`, `note`. Plus a per-document name-substring sweep over `message` strings — applied **before** the field-key walk, because real names show up embedded in message text. + +Everything else (dates, amounts, currency, `month_key`, `attendance_count`, `purpose`, `confidence`, `expected`, `paid`, `total_balance`, `fee`, all `YYYY-MM` keys, `match`/`matches` structure) is preserved verbatim. **Whitelist-of-scramble** (not blacklist-of-preserve): when a new field appears, it stays raw until someone explicitly adds it to the list. Fails safe. + +### Scrambling functions + +- **Names**: `Member_<8hex>` where `<8hex> = sha256(name).hexdigest()[:8]`. Same name → same pseudonym across the whole document and across all fixtures. Stable diffs. +- **Account numbers** (`[0-9]+/[0-9]{4}`): scramble prefix and bank-suffix separately, preserving length and format. +- **VS / bank_id / user_id**: digit-string-preserving hash to a same-length numeric token. Non-numeric input → `id_<8hex>`. +- **Note**: replaced verbatim with `""`. Notes are never load-bearing for any test. +- **Message** (free text): name-sweep applied; rest preserved. Corpus author spot-checks before commit. README §5 documents the audit grep. + +## Reconcile fixtures (10 handcrafted cases) + +All seeds live in `_fixture_seeds.py` as triples `(members, sorted_months, transactions, exceptions, default_year)`. Capture runs the live Python `reconcile()` and emits canonical JSON; scrubber is a no-op for handcrafted synthetic names but runs anyway for uniformity. + +| File | Branch exercised | +|---|---| +| `01_greedy_exact.json` | Greedy: amount == sum(expected); zero credit. | +| `02_greedy_overpayment_credit.json` | Greedy with overflow → credit. | +| `03_proportional_remainder.json` | Underpayment across 3 months with non-integer split (last month absorbs float remainder per [match_payments.py:421+](../../srv/personal/fuj-management/scripts/match_payments.py#L421)). | +| `04_even_split_prepayment.json` | All `expected == 0` → even-split fallback. | +| `05_out_of_window_credit.json` | Month outside `sorted_months` → that share goes to credits, in-window proportional for the rest. | +| `06_exception_override.json` | Exception entry overrides expected. | +| `07_other_purpose_split.json` | `purpose="other:tournament"` with two members. | +| `08_junior_question_mark.json` | Junior with attendance count 1 → `Expected{Unknown:true}`; reconcile reads it as 0 expected. | +| `09_multiperson_multimonth.json` | `person="Alice, Bob", purpose="2026-01, 2026-02"` → 2x2 fan-out: even-split-by-people then proportional-by-month. | +| `10_unmatched.json` | Empty `person`, garbage message → goes to `unmatched`. | + +The seed registry is the **single source of truth** for these inputs. If Python behaviour drifts intentionally, fixtures regenerate cleanly via `make capture-fixtures`. + +## Real-data seeds (for `parse_month_references` and `match_members` only) + +`_fixture_seeds.py` reads `tmp/payments_transactions_cache.json` (already gitignored) and selects: + +- **`parse_month_references`**: ~15 distinct messages exercising the 45 Czech month declensions, range wraps (`"prosinec-leden"`), year inference, and the `m >= 10 → previous year` heuristic. Selection done once interactively, the chosen indices hardcoded into `_fixture_seeds.py` so re-runs are deterministic. Messages flow through capture (which calls `parse_month_references(msg, default_year=2026)`) then scrubber (name-sweep against the live member roster). +- **`match_members`**: ~10 distinct `(message, member_names)` pairs exercising auto vs review confidence, common-surname filter, exact-short-circuit. Same pipeline. + +**Out of scope for real seeds**: `normalize`, `_build_name_variants`, `reconcile`. These either don't benefit from real data (synthetic exhaustively covers `normalize`, `_build_name_variants`) or have surgical-input requirements that real data can't reliably hit (`reconcile`'s 10 branches). + +## Go parity-test layout + +One file per function, one Go package per function, mirroring the fixture tree. Each file is short (~30 lines): + +```go +//go:build parity + +package normalize_parity_test + +import ( + "fuj-management/go/internal/domain/czech" + "fuj-management/go/tests/parity" + "testing" +) + +func TestNormalizeParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/normalize", + func(in parity.NormalizeIn) parity.NormalizeOut { + return parity.NormalizeOut{Text: czech.Normalize(in.Text)} + }) +} +``` + +The shared [go/tests/parity/parityio.go](../../srv/personal/fuj-management/go/tests/parity/parityio.go) (also `//go:build parity`) provides: + +- `Case[I, O any]` generic loader: walks a fixture directory, decodes each `.json`, returns `(name, input, want)` triples. +- `RunAll[I, O any](t, dir, fn func(I) O)`: invokes `fn`, compares against `want` with `reflect.DeepEqual` (sorted-slice normalisation for the few sets-cast-to-lists Python returns); for floats uses `math.Abs(got-want) <= 0.01`. +- One typed `In` / `Out` struct pair per function (10 pairs), mirroring §3's JSON shape exactly. Envelope decoder helpers (`AmountEnvelope`, `ValueEnvelope`) live here. + +**Reconcile is bespoke** — `reconcile/reconcile_parity_test.go` doesn't use `RunAll` because it needs cell-by-cell tolerant float compare across nested maps. It walks the fixture dir directly. + +**Why one-file-per-function** (instead of an umbrella runner): each function lives in a different domain package, so tests must `import` a different package; an umbrella would obscure which package is being checked. Split also enables `go test -tags=parity ./tests/parity/pure/normalize/` to iterate on a single port. + +**Why a separate test tree** (instead of co-located parity tests): the M2 unit tests are co-located by convention (e.g. [go/internal/domain/czech/normalize_test.go](../../srv/personal/fuj-management/go/internal/domain/czech/normalize_test.go)). The progress tracker explicitly says fixtures live at `go/tests/fixtures/` and the gate is `go test -tags=parity ./tests/parity/pure/...`. Co-location would scatter fixtures across packages — messy. Separate tree wins. + +## Build tag + Makefile + +Every parity test file starts with `//go:build parity`. Default `make go-test` excludes them; `make go-parity` runs them: + +```makefile +go-parity: + cd $(GO_SRC) && go test -tags=parity ./tests/parity/... + +go-test-all: go-test go-parity + +capture-fixtures: + @bash scripts/capture_all_fixtures.sh # invokes capture | scrub for every seed +``` + +Parity is **not** folded into default `go-test`: keeps the M2 unit-test loop fast, and a missing-fixture failure shouldn't block routine work. CI runs both targets independently so a parity break is a distinct red signal from a unit-test break. + +## README content (`go/tests/fixtures/README.md`) + +Six sections, ~120 lines: + +1. **What's in this tree** — directory map; one line per fixture function explaining what it validates. +2. **Fixture format** — link to schemas in §3; worked example for `parse_month_references` and one for `reconcile`. +3. **Refresh workflow** — `make capture-fixtures` regenerates everything; single-file recipe for incremental updates. Always diff before committing. +4. **When to refresh** — bullet list (schema change, new Czech declension, new fee tier, new reconcile branch). **Do not refresh to "fix" a parity failure** without first proving the Python behaviour is the intended one. +5. **Verifying scrubbing** — `git diff` should show only `Member_`-shaped names, `` notes, structurally-preserved account/VS digits. Audit grep: `git ls-files go/tests/fixtures | xargs grep -l ''` should return zero before commit. +6. **Adding a new fixture** — three steps (add to `_fixture_seeds.py`, run capture, add `In/Out` Go struct fields if needed). + +## Parity concerns + +- **Float arithmetic in reconcile proportional phase**: ordering-sensitive, may diverge between Python and Go due to FMA. Tolerance `0.01` already in [go/internal/domain/reconcile/reconcile_test.go](../../srv/personal/fuj-management/go/internal/domain/reconcile/reconcile_test.go); parity uses the same tolerance. +- **Sync-ID float-vs-int stringification**: handled by the envelope (§3). Capture two paired cases per amount value (`amount_750_int.json`, `amount_750_float.json`) so any Go-side conflation surfaces immediately. +- **NFKD edge cases**: capture set must include rare characters from real names. The handcrafted `normalize` seeds enumerate every distinct character observed in the live member roster (extracted once from `tmp/attendance_regular_cache.json`, hardcoded into `_fixture_seeds.py` as a single-character-per-case sweep). +- **Czech month declensions**: the real-message seeds for `parse_month_references` cover the wild; handcrafted seeds cover the corner cases (`prosinec-leden` wrap, `m >= 10` heuristic). +- **Insertion-order determinism in `reconcile`**: Python 3.7+ dict iteration is insertion-ordered; the seed registry preserves order. Go side iterates `sortedMonths` slice explicitly; the parity test verifies this. +- **`infer_transaction_details` default_year**: Python signature defaults to 2026; capture passes `default_year` as an explicit input. Go side reads it from the fixture. + +## Out of scope (explicitly DO NOT touch) + +- Real Google Sheets / Drive / Fio loader implementations — M4.1–M4.6. +- Web routes / handlers — M5. +- `fuj sync` and `fuj infer` subcommands — M4.7/M4.8. +- Tier-2 JSON-API parity (`cmd/parity/main.go`) — M5.4. +- Any change to existing Python code (capture is read-only against the production scripts). +- Any change to existing Go production code under `go/internal/`. + +## Verification + +1. `make go-build` — clean build (parity tests excluded by default tag). +2. `make go-test` — all M2 unit tests still green; no parity test runs. +3. `make go-parity` — every fixture in `go/tests/fixtures/pure/` and `go/tests/fixtures/reconcile/` deserialises and passes its parity assertion. +4. `make go-lint` — clean (parity test files lint-clean under `-tags=parity` since `golangci-lint` honours build tags via `.golangci.yml`). +5. **Capture round-trip**: pick one fixture (e.g. `parse_month_references/range_wrap_nov_to_jan.json`), regenerate via `python scripts/capture_fixtures.py --func parse_month_references --case range_wrap_nov_to_jan --input-seed range_wrap_nov_to_jan | python scripts/scrub_fixtures.py`, confirm byte-identical to the committed file. +6. **Scrubbing audit**: run the README §5 grep against any name from the live roster — zero hits. +7. **Reconcile branch coverage**: read each of the 10 reconcile fixture files, confirm the `output` field shows the expected branch (e.g. `02_greedy_overpayment_credit.json` has a non-zero `credits` entry; `04_even_split_prepayment.json` has equal `paid` across all months). +8. Append CHANGELOG entry per [CLAUDE.md](../../srv/personal/fuj-management/CLAUDE.md) (timestamp via `date "+%Y-%m-%d %H:%M %Z"`). +9. Tick M3.1–M3.6 in [docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md](../../srv/personal/fuj-management/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md) with the merge SHA. Update the M3 milestone summary line if M3 is now fully closed. +10. Push branch, open MR via `tea pr create --title "feat(go): fixture capture + characterization framework (M3)" --base main --head feat/m3-fixture-capture`, print URL, leave merge to user. + +## Critical files + +- **Read for parity** — [scripts/czech_utils.py:22](../../srv/personal/fuj-management/scripts/czech_utils.py#L22), [scripts/czech_utils.py:28](../../srv/personal/fuj-management/scripts/czech_utils.py#L28), [scripts/attendance.py:91](../../srv/personal/fuj-management/scripts/attendance.py#L91), [scripts/attendance.py:100](../../srv/personal/fuj-management/scripts/attendance.py#L100), [scripts/infer_payments.py:17](../../srv/personal/fuj-management/scripts/infer_payments.py#L17), [scripts/sync_fio_to_sheets.py:62](../../srv/personal/fuj-management/scripts/sync_fio_to_sheets.py#L62), [scripts/match_payments.py:33](../../srv/personal/fuj-management/scripts/match_payments.py#L33), [scripts/match_payments.py:65](../../srv/personal/fuj-management/scripts/match_payments.py#L65), [scripts/match_payments.py:144](../../srv/personal/fuj-management/scripts/match_payments.py#L144), [scripts/match_payments.py:187](../../srv/personal/fuj-management/scripts/match_payments.py#L187), [scripts/match_payments.py:304](../../srv/personal/fuj-management/scripts/match_payments.py#L304). +- **Reuse** — `domain/czech.{Normalize, ParseMonthReferences}`, `domain/fees.{CalculateFee, CalculateJuniorFee, Expected}`, `domain/money.ParseCZK`, `domain/synch.GenerateSyncID`, `domain/matching.{BuildNameVariants, MatchMembers, InferTransactionDetails, FormatDate}`, `domain/reconcile.{Member, Transaction, ExceptionKey, Exception, Result, Reconcile}`. +- **Mirror conventions** — package layout from [go/internal/domain/matching/](../../srv/personal/fuj-management/go/internal/domain/matching/) (one symbol per file, top-of-test provenance comments, `t.Parallel()`, `// [Go]` markers for Go-only cases). +- **New** — `scripts/{capture_fixtures,scrub_fixtures,_fixture_seeds}.py`; `go/tests/fixtures/README.md` + the corpus; `go/tests/parity/parityio.go` + 10 parity test files + 1 reconcile parity test file. +- **Modify** — `Makefile` (3 new targets), `CHANGELOG.md` (1 entry), `docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md` (tick M3.1–M3.6). diff --git a/go/tests/fixtures/README.md b/go/tests/fixtures/README.md new file mode 100644 index 0000000..4225f68 --- /dev/null +++ b/go/tests/fixtures/README.md @@ -0,0 +1,128 @@ +# Parity Fixtures + +Captured outputs from the live Python implementation used as ground truth for +the Go parity test suite. All 98 files are committed and PII-free. + +## Directory layout + +``` +fixtures/ + pure/ + normalize/ # scripts.czech_utils.normalize + parse_month_references/ # scripts.czech_utils.parse_month_references + calculate_fee/ # scripts.attendance.calculate_fee + calculate_junior_fee/ # scripts.attendance.calculate_junior_fee + parse_czk_amount/ # scripts.infer_payments.parse_czk_amount + generate_sync_id/ # scripts.sync_fio_to_sheets.generate_sync_id + build_name_variants/ # scripts.match_payments._build_name_variants + match_members/ # scripts.match_payments.match_members + infer_transaction_details/ # scripts.match_payments.infer_transaction_details + format_date/ # scripts.match_payments.format_date + reconcile/ # scripts.match_payments.reconcile (10 branch-coverage cases) +``` + +## Fixture format + +One JSON object per file: + +```json +{ + "case": "range_wrap_nov_to_jan", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { "text": "...", "default_year": 2026 }, + "output": { "months": ["2025-11", "2025-12", "2026-01"] } +} +``` + +`captured_at` is date-only so same-day re-runs produce byte-identical files. + +### Amount type envelope + +Four fields carry a type envelope to distinguish Python `int` / `float` / `None`: + +```json +{"type": "int", "value": 750} +{"type": "float", "value": 750.0} +{"type": "string", "value": "..."} +{"type": "none"} +``` + +Fields that use envelopes: `generate_sync_id.tx.amount`, `parse_czk_amount.val`, +`format_date.val`, `infer_transaction_details.tx.date`. + +### Reconcile member format + +Reconcile input members use a named dict to allow consistent PII scrubbing: + +```json +{"name": "Member_d035d9f9", "tier": "A", "fees": {"2026-01": [750, 3]}} +``` + +## Running the parity tests + +```bash +make go-parity # run all parity tests +make go-test-all # unit tests + parity tests +``` + +Or directly: + +```bash +cd go && go test -tags=parity ./tests/parity/... +cd go && go test -tags=parity -v -run TestReconcileParity ./tests/parity/reconcile/ +``` + +## Refresh workflow + +Regenerate the entire corpus from the live Python implementation: + +```bash +make capture-fixtures +git diff go/tests/fixtures/ # review changes before committing +``` + +To refresh a single function: + +```bash +PYTHONPATH=scripts:. python3 scripts/capture_fixtures.py --func normalize --all \ + | while IFS= read -r line; do + id=$(echo "$line" | python3 -c "import sys,json; print(json.load(sys.stdin)['case'])") + echo "$line" | python3 scripts/scrub_fixtures.py \ + > go/tests/fixtures/pure/normalize/${id}.json + done +``` + +## When to refresh + +- A ported function is intentionally changed to match updated Python behaviour. +- A new Czech declension or fee tier is added to the Python implementation. +- A new reconcile code path needs fixture coverage. + +**Do not refresh to silence a failing parity test** without first confirming that +the Python behaviour is the correct reference. A parity failure means either the +Go port diverges or the Python implementation changed — diagnose before regenerating. + +## PII scrubbing audit + +No real member names should appear in committed fixtures. Before committing any +regenerated fixtures, verify with: + +```bash +# Replace with names from the real roster to check: +git ls-files go/tests/fixtures | xargs grep -l "Real Name Here" | head +``` + +The scrubber applies deterministic SHA-256 pseudonyms (`Member_<8hex>`) to all +PII fields. `match_members` and `infer_transaction_details` fixtures use a +synthetic roster of fictional names and are exempt from field-key scrubbing; +verify that no real roster names appear in their `member_names` arrays. + +## Adding a new fixture + +1. Add a seed to `scripts/_fixture_seeds.py` under `SEEDS[("func_name", "case_id")]`. +2. Add `In`/`Out` struct fields to `go/tests/parity/parityio.go` if the function + is new. +3. Run the single-file capture recipe above and review the diff. +4. The parity test picks up new fixtures automatically — no test code changes needed + (unless the function itself is new). diff --git a/go/tests/fixtures/pure/build_name_variants/common_diacritics.json b/go/tests/fixtures/pure/build_name_variants/common_diacritics.json new file mode 100644 index 0000000..1c5436b --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/common_diacritics.json @@ -0,0 +1,15 @@ +{ + "case": "common_diacritics", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "Alžběta Testovická" + }, + "output": { + "variants": [ + "alzbeta testovicka", + "testovicka", + "alzbeta" + ] + } +} diff --git a/go/tests/fixtures/pure/build_name_variants/full_name_no_nick.json b/go/tests/fixtures/pure/build_name_variants/full_name_no_nick.json new file mode 100644 index 0000000..a6fd2ab --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/full_name_no_nick.json @@ -0,0 +1,15 @@ +{ + "case": "full_name_no_nick", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "Jan Novák" + }, + "output": { + "variants": [ + "jan novak", + "novak", + "jan" + ] + } +} diff --git a/go/tests/fixtures/pure/build_name_variants/short_name_filtered.json b/go/tests/fixtures/pure/build_name_variants/short_name_filtered.json new file mode 100644 index 0000000..dcca9b3 --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/short_name_filtered.json @@ -0,0 +1,11 @@ +{ + "case": "short_name_filtered", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "Jo" + }, + "output": { + "variants": [] + } +} diff --git a/go/tests/fixtures/pure/build_name_variants/single_word.json b/go/tests/fixtures/pure/build_name_variants/single_word.json new file mode 100644 index 0000000..cd1d132 --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/single_word.json @@ -0,0 +1,13 @@ +{ + "case": "single_word", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "Jáchym" + }, + "output": { + "variants": [ + "jachym" + ] + } +} diff --git a/go/tests/fixtures/pure/build_name_variants/three_word_name.json b/go/tests/fixtures/pure/build_name_variants/three_word_name.json new file mode 100644 index 0000000..6b9b354 --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/three_word_name.json @@ -0,0 +1,16 @@ +{ + "case": "three_word_name", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "Jan Tomášek (Honza)" + }, + "output": { + "variants": [ + "jan tomasek", + "honza", + "tomasek", + "jan" + ] + } +} diff --git a/go/tests/fixtures/pure/build_name_variants/with_nickname.json b/go/tests/fixtures/pure/build_name_variants/with_nickname.json new file mode 100644 index 0000000..fa9fa05 --- /dev/null +++ b/go/tests/fixtures/pure/build_name_variants/with_nickname.json @@ -0,0 +1,16 @@ +{ + "case": "with_nickname", + "func": "scripts.match_payments._build_name_variants", + "captured_at": "2026-05-06", + "input": { + "full_name": "František Vrbík (Štrúdl)" + }, + "output": { + "variants": [ + "frantisek vrbik", + "strudl", + "vrbik", + "frantisek" + ] + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/one_session.json b/go/tests/fixtures/pure/calculate_fee/one_session.json new file mode 100644 index 0000000..6e6e5ef --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/one_session.json @@ -0,0 +1,12 @@ +{ + "case": "one_session", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 1, + "month_key": "2026-01" + }, + "output": { + "fee": 200 + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/three_sessions_known_rate.json b/go/tests/fixtures/pure/calculate_fee/three_sessions_known_rate.json new file mode 100644 index 0000000..3a2ded9 --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/three_sessions_known_rate.json @@ -0,0 +1,12 @@ +{ + "case": "three_sessions_known_rate", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 3, + "month_key": "2026-02" + }, + "output": { + "fee": 750 + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/two_sessions_default_fallback.json b/go/tests/fixtures/pure/calculate_fee/two_sessions_default_fallback.json new file mode 100644 index 0000000..fc0b814 --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/two_sessions_default_fallback.json @@ -0,0 +1,12 @@ +{ + "case": "two_sessions_default_fallback", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2099-01" + }, + "output": { + "fee": 700 + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/two_sessions_known_rate.json b/go/tests/fixtures/pure/calculate_fee/two_sessions_known_rate.json new file mode 100644 index 0000000..5e8186e --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/two_sessions_known_rate.json @@ -0,0 +1,12 @@ +{ + "case": "two_sessions_known_rate", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2026-01" + }, + "output": { + "fee": 750 + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/two_sessions_reduced_march.json b/go/tests/fixtures/pure/calculate_fee/two_sessions_reduced_march.json new file mode 100644 index 0000000..bee6239 --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/two_sessions_reduced_march.json @@ -0,0 +1,12 @@ +{ + "case": "two_sessions_reduced_march", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2026-03" + }, + "output": { + "fee": 350 + } +} diff --git a/go/tests/fixtures/pure/calculate_fee/zero_sessions.json b/go/tests/fixtures/pure/calculate_fee/zero_sessions.json new file mode 100644 index 0000000..c138d7d --- /dev/null +++ b/go/tests/fixtures/pure/calculate_fee/zero_sessions.json @@ -0,0 +1,12 @@ +{ + "case": "zero_sessions", + "func": "scripts.attendance.calculate_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 0, + "month_key": "2026-01" + }, + "output": { + "fee": 0 + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/one_session_unknown.json b/go/tests/fixtures/pure/calculate_junior_fee/one_session_unknown.json new file mode 100644 index 0000000..89fda4e --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/one_session_unknown.json @@ -0,0 +1,13 @@ +{ + "case": "one_session_unknown", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 1, + "month_key": "2026-01" + }, + "output": { + "value": 0, + "unknown": true + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default.json b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default.json new file mode 100644 index 0000000..887ba1a --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default.json @@ -0,0 +1,13 @@ +{ + "case": "two_sessions_default", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2026-01" + }, + "output": { + "value": 500, + "unknown": false + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default_fallback.json b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default_fallback.json new file mode 100644 index 0000000..8a629a8 --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_default_fallback.json @@ -0,0 +1,13 @@ +{ + "case": "two_sessions_default_fallback", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2099-06" + }, + "output": { + "value": 500, + "unknown": false + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_march.json b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_march.json new file mode 100644 index 0000000..38408ac --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_march.json @@ -0,0 +1,13 @@ +{ + "case": "two_sessions_reduced_march", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2026-03" + }, + "output": { + "value": 250, + "unknown": false + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_sep.json b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_sep.json new file mode 100644 index 0000000..69fc944 --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/two_sessions_reduced_sep.json @@ -0,0 +1,13 @@ +{ + "case": "two_sessions_reduced_sep", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 2, + "month_key": "2025-09" + }, + "output": { + "value": 250, + "unknown": false + } +} diff --git a/go/tests/fixtures/pure/calculate_junior_fee/zero_sessions.json b/go/tests/fixtures/pure/calculate_junior_fee/zero_sessions.json new file mode 100644 index 0000000..fc954de --- /dev/null +++ b/go/tests/fixtures/pure/calculate_junior_fee/zero_sessions.json @@ -0,0 +1,13 @@ +{ + "case": "zero_sessions", + "func": "scripts.attendance.calculate_junior_fee", + "captured_at": "2026-05-06", + "input": { + "attendance_count": 0, + "month_key": "2026-01" + }, + "output": { + "value": 0, + "unknown": false + } +} diff --git a/go/tests/fixtures/pure/format_date/empty_string.json b/go/tests/fixtures/pure/format_date/empty_string.json new file mode 100644 index 0000000..62871b4 --- /dev/null +++ b/go/tests/fixtures/pure/format_date/empty_string.json @@ -0,0 +1,14 @@ +{ + "case": "empty_string", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "" + } + }, + "output": { + "date": "" + } +} diff --git a/go/tests/fixtures/pure/format_date/none_value.json b/go/tests/fixtures/pure/format_date/none_value.json new file mode 100644 index 0000000..a4ae92c --- /dev/null +++ b/go/tests/fixtures/pure/format_date/none_value.json @@ -0,0 +1,13 @@ +{ + "case": "none_value", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "none" + } + }, + "output": { + "date": "" + } +} diff --git a/go/tests/fixtures/pure/format_date/serial_float.json b/go/tests/fixtures/pure/format_date/serial_float.json new file mode 100644 index 0000000..6d7b596 --- /dev/null +++ b/go/tests/fixtures/pure/format_date/serial_float.json @@ -0,0 +1,14 @@ +{ + "case": "serial_float", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "float", + "value": 46027.5 + } + }, + "output": { + "date": "2026-01-05" + } +} diff --git a/go/tests/fixtures/pure/format_date/serial_float_exact.json b/go/tests/fixtures/pure/format_date/serial_float_exact.json new file mode 100644 index 0000000..2d5ed93 --- /dev/null +++ b/go/tests/fixtures/pure/format_date/serial_float_exact.json @@ -0,0 +1,14 @@ +{ + "case": "serial_float_exact", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "float", + "value": 45957.0 + } + }, + "output": { + "date": "2025-10-27" + } +} diff --git a/go/tests/fixtures/pure/format_date/serial_int.json b/go/tests/fixtures/pure/format_date/serial_int.json new file mode 100644 index 0000000..271555a --- /dev/null +++ b/go/tests/fixtures/pure/format_date/serial_int.json @@ -0,0 +1,14 @@ +{ + "case": "serial_int", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "int", + "value": 46027 + } + }, + "output": { + "date": "2026-01-05" + } +} diff --git a/go/tests/fixtures/pure/format_date/string_iso.json b/go/tests/fixtures/pure/format_date/string_iso.json new file mode 100644 index 0000000..430cf1b --- /dev/null +++ b/go/tests/fixtures/pure/format_date/string_iso.json @@ -0,0 +1,14 @@ +{ + "case": "string_iso", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "2026-01-15" + } + }, + "output": { + "date": "2026-01-15" + } +} diff --git a/go/tests/fixtures/pure/format_date/string_non_iso.json b/go/tests/fixtures/pure/format_date/string_non_iso.json new file mode 100644 index 0000000..096d10f --- /dev/null +++ b/go/tests/fixtures/pure/format_date/string_non_iso.json @@ -0,0 +1,14 @@ +{ + "case": "string_non_iso", + "func": "scripts.match_payments.format_date", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "garbage" + } + }, + "output": { + "date": "garbage" + } +} diff --git a/go/tests/fixtures/pure/generate_sync_id/empty_fields.json b/go/tests/fixtures/pure/generate_sync_id/empty_fields.json new file mode 100644 index 0000000..7ccd7ae --- /dev/null +++ b/go/tests/fixtures/pure/generate_sync_id/empty_fields.json @@ -0,0 +1,22 @@ +{ + "case": "empty_fields", + "func": "scripts.sync_fio_to_sheets.generate_sync_id", + "captured_at": "2026-05-06", + "input": { + "tx": { + "date": "2026-03-01", + "amount": { + "type": "float", + "value": 0.0 + }, + "currency": "CZK", + "sender": "", + "vs": "", + "message": "", + "bank_id": "" + } + }, + "output": { + "sync_id": "80d5f2762dbe807adde8dab64c3f3f00936ceafc75d4ceba232b08c09bb71c60" + } +} diff --git a/go/tests/fixtures/pure/generate_sync_id/integer_amount.json b/go/tests/fixtures/pure/generate_sync_id/integer_amount.json new file mode 100644 index 0000000..6c13ef5 --- /dev/null +++ b/go/tests/fixtures/pure/generate_sync_id/integer_amount.json @@ -0,0 +1,22 @@ +{ + "case": "integer_amount", + "func": "scripts.sync_fio_to_sheets.generate_sync_id", + "captured_at": "2026-05-06", + "input": { + "tx": { + "date": "2026-01-15", + "amount": { + "type": "int", + "value": 750 + }, + "currency": "CZK", + "sender": "Member_9b16314c", + "vs": "864722", + "message": "pausal leden", + "bank_id": "983770300" + } + }, + "output": { + "sync_id": "155e983a0a3a11210e19728c427395f6681ee5d2a0ef3b60438e6efeaf3775df" + } +} diff --git a/go/tests/fixtures/pure/generate_sync_id/large_amount.json b/go/tests/fixtures/pure/generate_sync_id/large_amount.json new file mode 100644 index 0000000..c8692a0 --- /dev/null +++ b/go/tests/fixtures/pure/generate_sync_id/large_amount.json @@ -0,0 +1,22 @@ +{ + "case": "large_amount", + "func": "scripts.sync_fio_to_sheets.generate_sync_id", + "captured_at": "2026-05-06", + "input": { + "tx": { + "date": "2025-10-05", + "amount": { + "type": "float", + "value": 2100.0 + }, + "currency": "CZK", + "sender": "Member_bd5eb92a", + "vs": "110515", + "message": "FUJ treninky", + "bank_id": "609470745" + } + }, + "output": { + "sync_id": "639d98f8ab8e6954b7e4d31508936cc4366ee0281eebc860338585cdeda43ae3" + } +} diff --git a/go/tests/fixtures/pure/generate_sync_id/missing_currency.json b/go/tests/fixtures/pure/generate_sync_id/missing_currency.json new file mode 100644 index 0000000..b6e74e5 --- /dev/null +++ b/go/tests/fixtures/pure/generate_sync_id/missing_currency.json @@ -0,0 +1,21 @@ +{ + "case": "missing_currency", + "func": "scripts.sync_fio_to_sheets.generate_sync_id", + "captured_at": "2026-05-06", + "input": { + "tx": { + "date": "2026-02-01", + "amount": { + "type": "float", + "value": 500.0 + }, + "sender": "Member_32a79b03", + "vs": "720261", + "message": "trenink", + "bank_id": "072657565" + } + }, + "output": { + "sync_id": "8bd2cc2c2e6b376ad2d2501f72ee5d987fdca37662c4be0b9bb5345dcb28553d" + } +} diff --git a/go/tests/fixtures/pure/generate_sync_id/typical_float_amount.json b/go/tests/fixtures/pure/generate_sync_id/typical_float_amount.json new file mode 100644 index 0000000..a70e7e0 --- /dev/null +++ b/go/tests/fixtures/pure/generate_sync_id/typical_float_amount.json @@ -0,0 +1,22 @@ +{ + "case": "typical_float_amount", + "func": "scripts.sync_fio_to_sheets.generate_sync_id", + "captured_at": "2026-05-06", + "input": { + "tx": { + "date": "2026-01-15", + "amount": { + "type": "float", + "value": 750.0 + }, + "currency": "CZK", + "sender": "Member_9b16314c", + "vs": "864722", + "message": "pausal leden", + "bank_id": "983770300" + } + }, + "output": { + "sync_id": "155e983a0a3a11210e19728c427395f6681ee5d2a0ef3b60438e6efeaf3775df" + } +} diff --git a/go/tests/fixtures/pure/infer_transaction_details/member_in_message.json b/go/tests/fixtures/pure/infer_transaction_details/member_in_message.json new file mode 100644 index 0000000..66743d7 --- /dev/null +++ b/go/tests/fixtures/pure/infer_transaction_details/member_in_message.json @@ -0,0 +1,36 @@ +{ + "case": "member_in_message", + "func": "scripts.match_payments.infer_transaction_details", + "captured_at": "2026-05-06", + "input": { + "tx": { + "sender": "Test Payer", + "message": "alzbeta testovicka leden 2026", + "user_id": "", + "date": { + "type": "string", + "value": "2026-01-15" + } + }, + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ], + "default_year": 2026 + }, + "output": { + "matches": [ + { + "name": "Alžběta Testovická", + "confidence": "auto" + } + ], + "months": [ + "2026-01" + ], + "search_text": "Test Payer alzbeta testovicka leden 2026 " + } +} diff --git a/go/tests/fixtures/pure/infer_transaction_details/member_in_sender.json b/go/tests/fixtures/pure/infer_transaction_details/member_in_sender.json new file mode 100644 index 0000000..fa94487 --- /dev/null +++ b/go/tests/fixtures/pure/infer_transaction_details/member_in_sender.json @@ -0,0 +1,36 @@ +{ + "case": "member_in_sender", + "func": "scripts.match_payments.infer_transaction_details", + "captured_at": "2026-05-06", + "input": { + "tx": { + "sender": "Tomáš Fiktivný", + "message": "FUJ trenink", + "user_id": "", + "date": { + "type": "string", + "value": "2026-02-01" + } + }, + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ], + "default_year": 2026 + }, + "output": { + "matches": [ + { + "name": "Tomáš Fiktivný (Tov)", + "confidence": "auto" + } + ], + "months": [ + "2026-02" + ], + "search_text": "Tomáš Fiktivný FUJ trenink " + } +} diff --git a/go/tests/fixtures/pure/infer_transaction_details/month_fallback_from_date.json b/go/tests/fixtures/pure/infer_transaction_details/month_fallback_from_date.json new file mode 100644 index 0000000..28c5026 --- /dev/null +++ b/go/tests/fixtures/pure/infer_transaction_details/month_fallback_from_date.json @@ -0,0 +1,36 @@ +{ + "case": "month_fallback_from_date", + "func": "scripts.match_payments.infer_transaction_details", + "captured_at": "2026-05-06", + "input": { + "tx": { + "sender": "Alžběta Testovická", + "message": "platba", + "user_id": "", + "date": { + "type": "string", + "value": "2026-03-15" + } + }, + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ], + "default_year": 2026 + }, + "output": { + "matches": [ + { + "name": "Alžběta Testovická", + "confidence": "auto" + } + ], + "months": [ + "2026-03" + ], + "search_text": "Alžběta Testovická platba " + } +} diff --git a/go/tests/fixtures/pure/infer_transaction_details/no_member_no_month.json b/go/tests/fixtures/pure/infer_transaction_details/no_member_no_month.json new file mode 100644 index 0000000..2fcf29a --- /dev/null +++ b/go/tests/fixtures/pure/infer_transaction_details/no_member_no_month.json @@ -0,0 +1,28 @@ +{ + "case": "no_member_no_month", + "func": "scripts.match_payments.infer_transaction_details", + "captured_at": "2026-05-06", + "input": { + "tx": { + "sender": "Unknown Person", + "message": "random text", + "user_id": "", + "date": { + "type": "none" + } + }, + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ], + "default_year": 2026 + }, + "output": { + "matches": [], + "months": [], + "search_text": "Unknown Person random text " + } +} diff --git a/go/tests/fixtures/pure/infer_transaction_details/serial_date.json b/go/tests/fixtures/pure/infer_transaction_details/serial_date.json new file mode 100644 index 0000000..9854abe --- /dev/null +++ b/go/tests/fixtures/pure/infer_transaction_details/serial_date.json @@ -0,0 +1,36 @@ +{ + "case": "serial_date", + "func": "scripts.match_payments.infer_transaction_details", + "captured_at": "2026-05-06", + "input": { + "tx": { + "sender": "Jana Nováková", + "message": "leden", + "user_id": "", + "date": { + "type": "float", + "value": 46027.0 + } + }, + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ], + "default_year": 2026 + }, + "output": { + "matches": [ + { + "name": "Jana Nováková", + "confidence": "auto" + } + ], + "months": [ + "2026-01" + ], + "search_text": "Jana Nováková leden " + } +} diff --git a/go/tests/fixtures/pure/match_members/common_surname_no_match.json b/go/tests/fixtures/pure/match_members/common_surname_no_match.json new file mode 100644 index 0000000..b6d6010 --- /dev/null +++ b/go/tests/fixtures/pure/match_members/common_surname_no_match.json @@ -0,0 +1,18 @@ +{ + "case": "common_surname_no_match", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "novak leden", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [] + } +} diff --git a/go/tests/fixtures/pure/match_members/exact_full_name.json b/go/tests/fixtures/pure/match_members/exact_full_name.json new file mode 100644 index 0000000..b8517bc --- /dev/null +++ b/go/tests/fixtures/pure/match_members/exact_full_name.json @@ -0,0 +1,23 @@ +{ + "case": "exact_full_name", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "platba od alzbeta testovicka leden", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [ + { + "name": "Alžběta Testovická", + "confidence": "auto" + } + ] + } +} diff --git a/go/tests/fixtures/pure/match_members/first_and_last.json b/go/tests/fixtures/pure/match_members/first_and_last.json new file mode 100644 index 0000000..dff25d3 --- /dev/null +++ b/go/tests/fixtures/pure/match_members/first_and_last.json @@ -0,0 +1,23 @@ +{ + "case": "first_and_last", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "jan nový payment tomas fiktivny", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [ + { + "name": "Tomáš Fiktivný (Tov)", + "confidence": "auto" + } + ] + } +} diff --git a/go/tests/fixtures/pure/match_members/nickname_match.json b/go/tests/fixtures/pure/match_members/nickname_match.json new file mode 100644 index 0000000..a5c7315 --- /dev/null +++ b/go/tests/fixtures/pure/match_members/nickname_match.json @@ -0,0 +1,23 @@ +{ + "case": "nickname_match", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "payment from strudl", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [ + { + "name": "Pavel Smutný (Štrúdl)", + "confidence": "auto" + } + ] + } +} diff --git a/go/tests/fixtures/pure/match_members/no_match.json b/go/tests/fixtures/pure/match_members/no_match.json new file mode 100644 index 0000000..7b7732e --- /dev/null +++ b/go/tests/fixtures/pure/match_members/no_match.json @@ -0,0 +1,18 @@ +{ + "case": "no_match", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "xyz platba", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [] + } +} diff --git a/go/tests/fixtures/pure/match_members/review_lastname_only.json b/go/tests/fixtures/pure/match_members/review_lastname_only.json new file mode 100644 index 0000000..1527a1c --- /dev/null +++ b/go/tests/fixtures/pure/match_members/review_lastname_only.json @@ -0,0 +1,23 @@ +{ + "case": "review_lastname_only", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "testovicka leden", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [ + { + "name": "Alžběta Testovická", + "confidence": "review" + } + ] + } +} diff --git a/go/tests/fixtures/pure/match_members/two_members_exact.json b/go/tests/fixtures/pure/match_members/two_members_exact.json new file mode 100644 index 0000000..41b0662 --- /dev/null +++ b/go/tests/fixtures/pure/match_members/two_members_exact.json @@ -0,0 +1,27 @@ +{ + "case": "two_members_exact", + "func": "scripts.match_payments.match_members", + "captured_at": "2026-05-06", + "input": { + "text": "pavel smutny a alzbeta testovicka", + "member_names": [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák" + ] + }, + "output": { + "matches": [ + { + "name": "Alžběta Testovická", + "confidence": "auto" + }, + { + "name": "Pavel Smutný (Štrúdl)", + "confidence": "auto" + } + ] + } +} diff --git a/go/tests/fixtures/pure/normalize/czech_basic.json b/go/tests/fixtures/pure/normalize/czech_basic.json new file mode 100644 index 0000000..66e12b4 --- /dev/null +++ b/go/tests/fixtures/pure/normalize/czech_basic.json @@ -0,0 +1,11 @@ +{ + "case": "czech_basic", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "štefan čakrtový" + }, + "output": { + "text": "stefan cakrtovy" + } +} diff --git a/go/tests/fixtures/pure/normalize/czech_full_set.json b/go/tests/fixtures/pure/normalize/czech_full_set.json new file mode 100644 index 0000000..734ef55 --- /dev/null +++ b/go/tests/fixtures/pure/normalize/czech_full_set.json @@ -0,0 +1,11 @@ +{ + "case": "czech_full_set", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "áčďéěíňóřšťůúýžÁČĎÉĚÍŇÓŘŠŤŮÚÝŽ" + }, + "output": { + "text": "acdeeinorstuuyzacdeeinorstuuyz" + } +} diff --git a/go/tests/fixtures/pure/normalize/digits_symbols.json b/go/tests/fixtures/pure/normalize/digits_symbols.json new file mode 100644 index 0000000..f6c7b1a --- /dev/null +++ b/go/tests/fixtures/pure/normalize/digits_symbols.json @@ -0,0 +1,11 @@ +{ + "case": "digits_symbols", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "FUJ2026! +3" + }, + "output": { + "text": "fuj2026! +3" + } +} diff --git a/go/tests/fixtures/pure/normalize/empty_string.json b/go/tests/fixtures/pure/normalize/empty_string.json new file mode 100644 index 0000000..a0f878e --- /dev/null +++ b/go/tests/fixtures/pure/normalize/empty_string.json @@ -0,0 +1,11 @@ +{ + "case": "empty_string", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "" + }, + "output": { + "text": "" + } +} diff --git a/go/tests/fixtures/pure/normalize/mixed_case.json b/go/tests/fixtures/pure/normalize/mixed_case.json new file mode 100644 index 0000000..3a3b995 --- /dev/null +++ b/go/tests/fixtures/pure/normalize/mixed_case.json @@ -0,0 +1,11 @@ +{ + "case": "mixed_case", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "Henrietta OTTOVÁ" + }, + "output": { + "text": "henrietta ottova" + } +} diff --git a/go/tests/fixtures/pure/normalize/simple_ascii.json b/go/tests/fixtures/pure/normalize/simple_ascii.json new file mode 100644 index 0000000..590a624 --- /dev/null +++ b/go/tests/fixtures/pure/normalize/simple_ascii.json @@ -0,0 +1,11 @@ +{ + "case": "simple_ascii", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "hello world" + }, + "output": { + "text": "hello world" + } +} diff --git a/go/tests/fixtures/pure/normalize/with_parens.json b/go/tests/fixtures/pure/normalize/with_parens.json new file mode 100644 index 0000000..8486076 --- /dev/null +++ b/go/tests/fixtures/pure/normalize/with_parens.json @@ -0,0 +1,11 @@ +{ + "case": "with_parens", + "func": "scripts.czech_utils.normalize", + "captured_at": "2026-05-06", + "input": { + "text": "Pavel Smutný (Štrúdl)" + }, + "output": { + "text": "pavel smutny (strudl)" + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/czech_comma_decimal.json b/go/tests/fixtures/pure/parse_czk_amount/czech_comma_decimal.json new file mode 100644 index 0000000..dfddfea --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/czech_comma_decimal.json @@ -0,0 +1,14 @@ +{ + "case": "czech_comma_decimal", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "1.500,00" + } + }, + "output": { + "amount": 1500.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/czech_comma_no_thousands.json b/go/tests/fixtures/pure/parse_czk_amount/czech_comma_no_thousands.json new file mode 100644 index 0000000..699d072 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/czech_comma_no_thousands.json @@ -0,0 +1,14 @@ +{ + "case": "czech_comma_no_thousands", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "750,00" + } + }, + "output": { + "amount": 750.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/dot_decimal.json b/go/tests/fixtures/pure/parse_czk_amount/dot_decimal.json new file mode 100644 index 0000000..5635377 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/dot_decimal.json @@ -0,0 +1,14 @@ +{ + "case": "dot_decimal", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "1500.00" + } + }, + "output": { + "amount": 1500.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/dot_thousand_separator.json b/go/tests/fixtures/pure/parse_czk_amount/dot_thousand_separator.json new file mode 100644 index 0000000..c3e76c8 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/dot_thousand_separator.json @@ -0,0 +1,14 @@ +{ + "case": "dot_thousand_separator", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "1.500" + } + }, + "output": { + "amount": 1.5 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/empty_string.json b/go/tests/fixtures/pure/parse_czk_amount/empty_string.json new file mode 100644 index 0000000..7bc3b2b --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/empty_string.json @@ -0,0 +1,14 @@ +{ + "case": "empty_string", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "" + } + }, + "output": { + "amount": 0.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/none_value.json b/go/tests/fixtures/pure/parse_czk_amount/none_value.json new file mode 100644 index 0000000..f60e2ea --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/none_value.json @@ -0,0 +1,13 @@ +{ + "case": "none_value", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "none" + } + }, + "output": { + "amount": 0.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/plain_float.json b/go/tests/fixtures/pure/parse_czk_amount/plain_float.json new file mode 100644 index 0000000..8fa26ec --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/plain_float.json @@ -0,0 +1,14 @@ +{ + "case": "plain_float", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "float", + "value": 750.0 + } + }, + "output": { + "amount": 750.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/plain_int.json b/go/tests/fixtures/pure/parse_czk_amount/plain_int.json new file mode 100644 index 0000000..c124d21 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/plain_int.json @@ -0,0 +1,14 @@ +{ + "case": "plain_int", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "int", + "value": 750 + } + }, + "output": { + "amount": 750.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/space_thousands.json b/go/tests/fixtures/pure/parse_czk_amount/space_thousands.json new file mode 100644 index 0000000..6295094 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/space_thousands.json @@ -0,0 +1,14 @@ +{ + "case": "space_thousands", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "1 500" + } + }, + "output": { + "amount": 1500.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/with_czk_suffix.json b/go/tests/fixtures/pure/parse_czk_amount/with_czk_suffix.json new file mode 100644 index 0000000..d5cf52c --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/with_czk_suffix.json @@ -0,0 +1,14 @@ +{ + "case": "with_czk_suffix", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "1500CZK" + } + }, + "output": { + "amount": 1500.0 + } +} diff --git a/go/tests/fixtures/pure/parse_czk_amount/with_kc_suffix.json b/go/tests/fixtures/pure/parse_czk_amount/with_kc_suffix.json new file mode 100644 index 0000000..81c8526 --- /dev/null +++ b/go/tests/fixtures/pure/parse_czk_amount/with_kc_suffix.json @@ -0,0 +1,14 @@ +{ + "case": "with_kc_suffix", + "func": "scripts.infer_payments.parse_czk_amount", + "captured_at": "2026-05-06", + "input": { + "val": { + "type": "string", + "value": "750 Kč" + } + }, + "output": { + "amount": 750.0 + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/empty_string.json b/go/tests/fixtures/pure/parse_month_references/empty_string.json new file mode 100644 index 0000000..6c55e00 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/empty_string.json @@ -0,0 +1,12 @@ +{ + "case": "empty_string", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "", + "default_year": 2026 + }, + "output": { + "months": [] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/mixed_czech_numeric.json b/go/tests/fixtures/pure/parse_month_references/mixed_czech_numeric.json new file mode 100644 index 0000000..830f262 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/mixed_czech_numeric.json @@ -0,0 +1,16 @@ +{ + "case": "mixed_czech_numeric", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "leden+únor+03/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01", + "2026-02", + "2026-03" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/no_month_found.json b/go/tests/fixtures/pure/parse_month_references/no_month_found.json new file mode 100644 index 0000000..31054fd --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/no_month_found.json @@ -0,0 +1,12 @@ +{ + "case": "no_month_found", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "random text without months", + "default_year": 2026 + }, + "output": { + "months": [] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/numeric_dot_format.json b/go/tests/fixtures/pure/parse_month_references/numeric_dot_format.json new file mode 100644 index 0000000..849acce --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/numeric_dot_format.json @@ -0,0 +1,14 @@ +{ + "case": "numeric_dot_format", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "12.2025", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/numeric_plus_multi.json b/go/tests/fixtures/pure/parse_month_references/numeric_plus_multi.json new file mode 100644 index 0000000..e508368 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/numeric_plus_multi.json @@ -0,0 +1,15 @@ +{ + "case": "numeric_plus_multi", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "11+12/2025", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-11", + "2025-12" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/numeric_slash_four_digit_year.json b/go/tests/fixtures/pure/parse_month_references/numeric_slash_four_digit_year.json new file mode 100644 index 0000000..f079063 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/numeric_slash_four_digit_year.json @@ -0,0 +1,14 @@ +{ + "case": "numeric_slash_four_digit_year", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "1/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/numeric_slash_leading_zero.json b/go/tests/fixtures/pure/parse_month_references/numeric_slash_leading_zero.json new file mode 100644 index 0000000..ec358dd --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/numeric_slash_leading_zero.json @@ -0,0 +1,14 @@ +{ + "case": "numeric_slash_leading_zero", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "03/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-03" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/numeric_slash_two_digit_year.json b/go/tests/fixtures/pure/parse_month_references/numeric_slash_two_digit_year.json new file mode 100644 index 0000000..219bc5c --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/numeric_slash_two_digit_year.json @@ -0,0 +1,14 @@ +{ + "case": "numeric_slash_two_digit_year", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "01/26", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/range_no_wrap_leden_unor.json b/go/tests/fixtures/pure/parse_month_references/range_no_wrap_leden_unor.json new file mode 100644 index 0000000..b69dc11 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/range_no_wrap_leden_unor.json @@ -0,0 +1,15 @@ +{ + "case": "range_no_wrap_leden_unor", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "leden-únor", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01", + "2026-02" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/range_wrap_listopad_leden.json b/go/tests/fixtures/pure/parse_month_references/range_wrap_listopad_leden.json new file mode 100644 index 0000000..466d0b1 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/range_wrap_listopad_leden.json @@ -0,0 +1,16 @@ +{ + "case": "range_wrap_listopad_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "listopad-leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-11", + "2025-12", + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/range_wrap_prosinec_leden.json b/go/tests/fixtures/pure/parse_month_references/range_wrap_prosinec_leden.json new file mode 100644 index 0000000..8a45b9c --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/range_wrap_prosinec_leden.json @@ -0,0 +1,15 @@ +{ + "case": "range_wrap_prosinec_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "prosinec-leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_alex_numeric_long.json b/go/tests/fixtures/pure/parse_month_references/real_alex_numeric_long.json new file mode 100644 index 0000000..317a6ce --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_alex_numeric_long.json @@ -0,0 +1,19 @@ +{ + "case": "real_alex_numeric_long", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_3f7108b7: 10/2025+11/2025+01/2026+02/2026+03/2026+04/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-10", + "2025-11", + "2026-01", + "2026-02", + "2026-03", + "2026-04" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_dominika_numeric_multi.json b/go/tests/fixtures/pure/parse_month_references/real_dominika_numeric_multi.json new file mode 100644 index 0000000..da84689 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_dominika_numeric_multi.json @@ -0,0 +1,17 @@ +{ + "case": "real_dominika_numeric_multi", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_22e1170d paušál 11+12/25, 01/26, 02/26", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-11", + "2025-12", + "2026-01", + "2026-02" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_emily_numeric_long.json b/go/tests/fixtures/pure/parse_month_references/real_emily_numeric_long.json new file mode 100644 index 0000000..5444aad --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_emily_numeric_long.json @@ -0,0 +1,19 @@ +{ + "case": "real_emily_numeric_long", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_b09f5558: 10/2025+11/2025+01/2026+02/2026+03/2026+04/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-10", + "2025-11", + "2026-01", + "2026-02", + "2026-03", + "2026-04" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_filip_prosinec_leden_unor.json b/go/tests/fixtures/pure/parse_month_references/real_filip_prosinec_leden_unor.json new file mode 100644 index 0000000..03c9160 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_filip_prosinec_leden_unor.json @@ -0,0 +1,16 @@ +{ + "case": "real_filip_prosinec_leden_unor", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Filip Halamka - prosinec, leden, unor", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01", + "2026-02" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_franc_numeric_space.json b/go/tests/fixtures/pure/parse_month_references/real_franc_numeric_space.json new file mode 100644 index 0000000..de064c6 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_franc_numeric_space.json @@ -0,0 +1,15 @@ +{ + "case": "real_franc_numeric_space", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_f42b5277:02/2026 03/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-02", + "2026-03" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_jachym_numeric_multi.json b/go/tests/fixtures/pure/parse_month_references/real_jachym_numeric_multi.json new file mode 100644 index 0000000..049604b --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_jachym_numeric_multi.json @@ -0,0 +1,16 @@ +{ + "case": "real_jachym_numeric_multi", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Jáchym Kubík: 01/2026+03/2026+04/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01", + "2026-03", + "2026-04" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_jana_numeric_multi.json b/go/tests/fixtures/pure/parse_month_references/real_jana_numeric_multi.json new file mode 100644 index 0000000..60893ba --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_jana_numeric_multi.json @@ -0,0 +1,16 @@ +{ + "case": "real_jana_numeric_multi", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_ca47f547: 02/2026+03/2026+04/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-02", + "2026-03", + "2026-04" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_list_prosinec_leden_unor.json b/go/tests/fixtures/pure/parse_month_references/real_list_prosinec_leden_unor.json new file mode 100644 index 0000000..8b3fc7f --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_list_prosinec_leden_unor.json @@ -0,0 +1,16 @@ +{ + "case": "real_list_prosinec_leden_unor", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Kacerr - pausal prosinec, leden, unor", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01", + "2026-02" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_martin_prosinec_leden.json b/go/tests/fixtures/pure/parse_month_references/real_martin_prosinec_leden.json new file mode 100644 index 0000000..37f6208 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_martin_prosinec_leden.json @@ -0,0 +1,15 @@ +{ + "case": "real_martin_prosinec_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Martin Bolvansky Pausal Prosinec Leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_mixed_czech_numeric.json b/go/tests/fixtures/pure/parse_month_references/real_mixed_czech_numeric.json new file mode 100644 index 0000000..395db7f --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_mixed_czech_numeric.json @@ -0,0 +1,16 @@ +{ + "case": "real_mixed_czech_numeric", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_7e9cb37a paušál leden+únor a 500 za 11,12/2025", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01", + "2026-02" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_range_listopad_leden.json b/go/tests/fixtures/pure/parse_month_references/real_range_listopad_leden.json new file mode 100644 index 0000000..52fdda0 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_range_listopad_leden.json @@ -0,0 +1,16 @@ +{ + "case": "real_range_listopad_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_3f0f0061 pausal listopad-leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-11", + "2025-12", + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_range_prosinec_leden.json b/go/tests/fixtures/pure/parse_month_references/real_range_prosinec_leden.json new file mode 100644 index 0000000..eb1136a --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_range_prosinec_leden.json @@ -0,0 +1,15 @@ +{ + "case": "real_range_prosinec_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_8fa4ba0e prosinec-leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12", + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_single_leden.json b/go/tests/fixtures/pure/parse_month_references/real_single_leden.json new file mode 100644 index 0000000..91140ad --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_single_leden.json @@ -0,0 +1,14 @@ +{ + "case": "real_single_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_89d22e73, paušál za leden 2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/real_tomik_numeric_plus.json b/go/tests/fixtures/pure/parse_month_references/real_tomik_numeric_plus.json new file mode 100644 index 0000000..6901123 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/real_tomik_numeric_plus.json @@ -0,0 +1,15 @@ +{ + "case": "real_tomik_numeric_plus", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "Member_e4654d4c: 02/2026+03/2026", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-02", + "2026-03" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/single_czech_leden.json b/go/tests/fixtures/pure/parse_month_references/single_czech_leden.json new file mode 100644 index 0000000..639819f --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/single_czech_leden.json @@ -0,0 +1,14 @@ +{ + "case": "single_czech_leden", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "leden", + "default_year": 2026 + }, + "output": { + "months": [ + "2026-01" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/single_czech_prosinec_high_month.json b/go/tests/fixtures/pure/parse_month_references/single_czech_prosinec_high_month.json new file mode 100644 index 0000000..e8a5ff6 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/single_czech_prosinec_high_month.json @@ -0,0 +1,14 @@ +{ + "case": "single_czech_prosinec_high_month", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "prosinec", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-12" + ] + } +} diff --git a/go/tests/fixtures/pure/parse_month_references/single_czech_rijen_high_month.json b/go/tests/fixtures/pure/parse_month_references/single_czech_rijen_high_month.json new file mode 100644 index 0000000..39cf946 --- /dev/null +++ b/go/tests/fixtures/pure/parse_month_references/single_czech_rijen_high_month.json @@ -0,0 +1,14 @@ +{ + "case": "single_czech_rijen_high_month", + "func": "scripts.czech_utils.parse_month_references", + "captured_at": "2026-05-06", + "input": { + "text": "říjen", + "default_year": 2026 + }, + "output": { + "months": [ + "2025-10" + ] + } +} diff --git a/go/tests/fixtures/reconcile/01_greedy_exact.json b/go/tests/fixtures/reconcile/01_greedy_exact.json new file mode 100644 index 0000000..bf74170 --- /dev/null +++ b/go/tests/fixtures/reconcile/01_greedy_exact.json @@ -0,0 +1,68 @@ +{ + "case": "01_greedy_exact", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 750, + "manual_fix": "", + "person": "Member_d035d9f9", + "purpose": "2026-01", + "inferred_amount": 750, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 750.0, + "transactions": [ + { + "amount": 750.0, + "date": "2026-01-20", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 0 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": 0 + } + } +} diff --git a/go/tests/fixtures/reconcile/02_greedy_overpayment.json b/go/tests/fixtures/reconcile/02_greedy_overpayment.json new file mode 100644 index 0000000..17f2b18 --- /dev/null +++ b/go/tests/fixtures/reconcile/02_greedy_overpayment.json @@ -0,0 +1,68 @@ +{ + "case": "02_greedy_overpayment", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 900, + "manual_fix": "", + "person": "Member_d035d9f9", + "purpose": "2026-01", + "inferred_amount": 900, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 750.0, + "transactions": [ + { + "amount": 750.0, + "date": "2026-01-20", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 150 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": 150 + } + } +} diff --git a/go/tests/fixtures/reconcile/03_proportional_remainder.json b/go/tests/fixtures/reconcile/03_proportional_remainder.json new file mode 100644 index 0000000..5aa43cf --- /dev/null +++ b/go/tests/fixtures/reconcile/03_proportional_remainder.json @@ -0,0 +1,110 @@ +{ + "case": "03_proportional_remainder", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ], + "2026-02": [ + 750, + 2 + ], + "2026-03": [ + 350, + 2 + ] + } + } + ], + "sorted_months": [ + "2026-01", + "2026-02", + "2026-03" + ], + "transactions": [ + { + "date": "2026-03-10", + "amount": 800, + "manual_fix": "", + "person": "Member_d035d9f9", + "purpose": "2026-01,2026-02,2026-03", + "inferred_amount": 800, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 324.3243243243243, + "transactions": [ + { + "amount": 324.3243243243243, + "date": "2026-03-10", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + }, + "2026-02": { + "expected": 750, + "original_expected": 750, + "attendance_count": 2, + "exception": null, + "paid": 324.3243243243243, + "transactions": [ + { + "amount": 324.3243243243243, + "date": "2026-03-10", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + }, + "2026-03": { + "expected": 350, + "original_expected": 350, + "attendance_count": 2, + "exception": null, + "paid": 151.35135135135135, + "transactions": [ + { + "amount": 151.35135135135135, + "date": "2026-03-10", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": -1051 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": -1051 + } + } +} diff --git a/go/tests/fixtures/reconcile/04_even_split_prepayment.json b/go/tests/fixtures/reconcile/04_even_split_prepayment.json new file mode 100644 index 0000000..9b24dcd --- /dev/null +++ b/go/tests/fixtures/reconcile/04_even_split_prepayment.json @@ -0,0 +1,89 @@ +{ + "case": "04_even_split_prepayment", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_f4a93e46", + "tier": "A", + "fees": { + "2026-04": [ + 0, + 0 + ], + "2026-05": [ + 0, + 0 + ] + } + } + ], + "sorted_months": [ + "2026-04", + "2026-05" + ], + "transactions": [ + { + "date": "2026-03-25", + "amount": 700, + "manual_fix": "", + "person": "Member_f4a93e46", + "purpose": "2026-04,2026-05", + "inferred_amount": 700, + "sender": "Member_f4a93e46", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_f4a93e46": { + "tier": "A", + "months": { + "2026-04": { + "expected": 0, + "original_expected": 0, + "attendance_count": 0, + "exception": null, + "paid": 350.0, + "transactions": [ + { + "amount": 350.0, + "date": "2026-03-25", + "sender": "Member_f4a93e46", + "message": "", + "confidence": "auto" + } + ] + }, + "2026-05": { + "expected": 0, + "original_expected": 0, + "attendance_count": 0, + "exception": null, + "paid": 350.0, + "transactions": [ + { + "amount": 350.0, + "date": "2026-03-25", + "sender": "Member_f4a93e46", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 700 + } + }, + "unmatched": [], + "credits": { + "Member_f4a93e46": 700 + } + } +} diff --git a/go/tests/fixtures/reconcile/05_out_of_window_credit.json b/go/tests/fixtures/reconcile/05_out_of_window_credit.json new file mode 100644 index 0000000..5b6739a --- /dev/null +++ b/go/tests/fixtures/reconcile/05_out_of_window_credit.json @@ -0,0 +1,68 @@ +{ + "case": "05_out_of_window_credit", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 1500, + "manual_fix": "", + "person": "Member_d035d9f9", + "purpose": "2026-01,2025-08", + "inferred_amount": 1500, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 750.0, + "transactions": [ + { + "amount": 750.0, + "date": "2026-01-20", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 750 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": 750 + } + } +} diff --git a/go/tests/fixtures/reconcile/06_exception_override.json b/go/tests/fixtures/reconcile/06_exception_override.json new file mode 100644 index 0000000..621ce3b --- /dev/null +++ b/go/tests/fixtures/reconcile/06_exception_override.json @@ -0,0 +1,78 @@ +{ + "case": "06_exception_override", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 300, + "manual_fix": "", + "person": "Member_d035d9f9", + "purpose": "2026-01", + "inferred_amount": 300, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [ + { + "name": "Member_d035d9f9", + "period": "2026-01", + "amount": 300, + "note": "" + } + ], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 300, + "original_expected": 750, + "attendance_count": 3, + "exception": { + "amount": 300, + "note": "" + }, + "paid": 300.0, + "transactions": [ + { + "amount": 300.0, + "date": "2026-01-20", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 0 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": 0 + } + } +} diff --git a/go/tests/fixtures/reconcile/07_other_purpose_split.json b/go/tests/fixtures/reconcile/07_other_purpose_split.json new file mode 100644 index 0000000..a7c0819 --- /dev/null +++ b/go/tests/fixtures/reconcile/07_other_purpose_split.json @@ -0,0 +1,104 @@ +{ + "case": "07_other_purpose_split", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + }, + { + "name": "Member_f4a93e46", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 2 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-10", + "amount": 800, + "manual_fix": "", + "person": "Member_d035d9f9, Member_f4a93e46", + "purpose": "other:tournament", + "inferred_amount": 800, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 0.0, + "transactions": [] + } + }, + "other_transactions": [ + { + "amount": 400.0, + "date": "2026-01-10", + "sender": "Member_d035d9f9", + "message": "", + "purpose": "other:tournament", + "confidence": "auto" + } + ], + "total_balance": -750 + }, + "Member_f4a93e46": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 2, + "exception": null, + "paid": 0.0, + "transactions": [] + } + }, + "other_transactions": [ + { + "amount": 400.0, + "date": "2026-01-10", + "sender": "Member_d035d9f9", + "message": "", + "purpose": "other:tournament", + "confidence": "auto" + } + ], + "total_balance": -750 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": -750, + "Member_f4a93e46": -750 + } + } +} diff --git a/go/tests/fixtures/reconcile/08_junior_question_mark.json b/go/tests/fixtures/reconcile/08_junior_question_mark.json new file mode 100644 index 0000000..8f2f9fc --- /dev/null +++ b/go/tests/fixtures/reconcile/08_junior_question_mark.json @@ -0,0 +1,68 @@ +{ + "case": "08_junior_question_mark", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_162ff8c7", + "tier": "A", + "fees": { + "2026-01": [ + 0, + 1 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 200, + "manual_fix": "", + "person": "Member_162ff8c7", + "purpose": "2026-01", + "inferred_amount": 200, + "sender": "Member_162ff8c7", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_162ff8c7": { + "tier": "A", + "months": { + "2026-01": { + "expected": 0, + "original_expected": 0, + "attendance_count": 1, + "exception": null, + "paid": 200.0, + "transactions": [ + { + "amount": 200.0, + "date": "2026-01-20", + "sender": "Member_162ff8c7", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": 200 + } + }, + "unmatched": [], + "credits": { + "Member_162ff8c7": 200 + } + } +} diff --git a/go/tests/fixtures/reconcile/09_multiperson_multimonth.json b/go/tests/fixtures/reconcile/09_multiperson_multimonth.json new file mode 100644 index 0000000..6144aa0 --- /dev/null +++ b/go/tests/fixtures/reconcile/09_multiperson_multimonth.json @@ -0,0 +1,143 @@ +{ + "case": "09_multiperson_multimonth", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ], + "2026-02": [ + 750, + 2 + ] + } + }, + { + "name": "Member_f4a93e46", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 2 + ], + "2026-02": [ + 350, + 2 + ] + } + } + ], + "sorted_months": [ + "2026-01", + "2026-02" + ], + "transactions": [ + { + "date": "2026-02-15", + "amount": 2000, + "manual_fix": "", + "person": "Member_d035d9f9, Member_f4a93e46", + "purpose": "2026-01,2026-02", + "inferred_amount": 2000, + "sender": "Member_d035d9f9", + "message": "", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 500.0, + "transactions": [ + { + "amount": 500.0, + "date": "2026-02-15", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + }, + "2026-02": { + "expected": 750, + "original_expected": 750, + "attendance_count": 2, + "exception": null, + "paid": 500.0, + "transactions": [ + { + "amount": 500.0, + "date": "2026-02-15", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": -500 + }, + "Member_f4a93e46": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 2, + "exception": null, + "paid": 681.8181818181819, + "transactions": [ + { + "amount": 681.8181818181819, + "date": "2026-02-15", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + }, + "2026-02": { + "expected": 350, + "original_expected": 350, + "attendance_count": 2, + "exception": null, + "paid": 318.18181818181813, + "transactions": [ + { + "amount": 318.18181818181813, + "date": "2026-02-15", + "sender": "Member_d035d9f9", + "message": "", + "confidence": "auto" + } + ] + } + }, + "other_transactions": [], + "total_balance": -101 + } + }, + "unmatched": [], + "credits": { + "Member_d035d9f9": -500, + "Member_f4a93e46": -101 + } + } +} diff --git a/go/tests/fixtures/reconcile/10_unmatched.json b/go/tests/fixtures/reconcile/10_unmatched.json new file mode 100644 index 0000000..f4da887 --- /dev/null +++ b/go/tests/fixtures/reconcile/10_unmatched.json @@ -0,0 +1,70 @@ +{ + "case": "10_unmatched", + "func": "scripts.match_payments.reconcile", + "captured_at": "2026-05-06", + "input": { + "members": [ + { + "name": "Member_d035d9f9", + "tier": "A", + "fees": { + "2026-01": [ + 750, + 3 + ] + } + } + ], + "sorted_months": [ + "2026-01" + ], + "transactions": [ + { + "date": "2026-01-20", + "amount": 500, + "manual_fix": "", + "person": "", + "purpose": "", + "inferred_amount": 500, + "sender": "Member_6e7f765f", + "message": "garbage xyz 999", + "bank_id": "" + } + ], + "exceptions": [], + "default_year": 2026 + }, + "output": { + "members": { + "Member_d035d9f9": { + "tier": "A", + "months": { + "2026-01": { + "expected": 750, + "original_expected": 750, + "attendance_count": 3, + "exception": null, + "paid": 0.0, + "transactions": [] + } + }, + "other_transactions": [], + "total_balance": -750 + } + }, + "unmatched": [ + { + "date": "2026-01-20", + "amount": 500.0, + "person": "", + "purpose": "", + "sender": "Member_6e7f765f", + "message": "garbage xyz 999", + "bank_id": "" + } + ], + "credits": { + "Member_d035d9f9": -750 + } + } +} diff --git a/go/tests/parity/parityio.go b/go/tests/parity/parityio.go new file mode 100644 index 0000000..8d19191 --- /dev/null +++ b/go/tests/parity/parityio.go @@ -0,0 +1,303 @@ +//go:build parity + +// Package parity provides fixture loading and assertion helpers for the +// M3 characterization test suite. Tests in this package are only compiled +// and run with -tags=parity. +// +// Fixture format: +// +// { +// "case": "some_case_id", +// "func": "scripts.module.func_name", +// "captured_at": "YYYY-MM-DD", +// "input": { ... function-specific ... }, +// "output": { ... function-specific ... } +// } +// +// Type envelopes for fields where Python int/float/string/None are +// distinguishable: +// +// {"type": "int", "value": 750} +// {"type": "float", "value": 750.0} +// {"type": "string", "value": "..."} +// {"type": "none"} +package parity + +import ( + "encoding/json" + "math" + "os" + "path/filepath" + "testing" +) + +// FixtureDoc is the top-level wrapper around a single captured case. +type FixtureDoc[I, O any] struct { + Case string `json:"case"` + Func string `json:"func"` + CapturedAt string `json:"captured_at"` + Input I `json:"input"` + Output O `json:"output"` +} + +// LoadDir reads every *.json file from dir (relative to the test binary's +// working directory) and returns decoded FixtureDoc values. +func LoadDir[I, O any](t *testing.T, dir string) []FixtureDoc[I, O] { + t.Helper() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("parity: cannot read fixture dir %q: %v", dir, err) + } + var docs []FixtureDoc[I, O] + for _, e := range entries { + if e.IsDir() || filepath.Ext(e.Name()) != ".json" { + continue + } + path := filepath.Join(dir, e.Name()) + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("parity: cannot read %q: %v", path, err) + } + var doc FixtureDoc[I, O] + if err := json.Unmarshal(data, &doc); err != nil { + t.Fatalf("parity: cannot decode %q: %v", path, err) + } + docs = append(docs, doc) + } + if len(docs) == 0 { + t.Fatalf("parity: no fixtures found in %q", dir) + } + return docs +} + +// RunAll is the default parity runner for functions with exact-equality output. +// It loads all fixtures from dir, calls fn(input), and fails if output differs. +func RunAll[I, O any](t *testing.T, dir string, fn func(I) O, eq func(want, got O) bool) { + t.Helper() + docs := LoadDir[I, O](t, dir) + for _, doc := range docs { + doc := doc // capture + t.Run(doc.Case, func(t *testing.T) { + t.Parallel() + got := fn(doc.Input) + if !eq(doc.Output, got) { + wantJSON, _ := json.MarshalIndent(doc.Output, "", " ") + gotJSON, _ := json.MarshalIndent(got, "", " ") + t.Errorf("parity mismatch for case %q:\n want: %s\n got: %s", + doc.Case, wantJSON, gotJSON) + } + }) + } +} + +// FloatClose returns true if a and b are within tol of each other. +func FloatClose(a, b, tol float64) bool { + return math.Abs(a-b) <= tol +} + +// --------------------------------------------------------------------------- +// Type envelopes +// --------------------------------------------------------------------------- + +// Envelope decodes a Python-type-annotated JSON value: +// +// {"type":"int","value":750} → int 750 +// {"type":"float","value":750.0} → float64 750.0 +// {"type":"string","value":"x"} → string "x" +// {"type":"none"} → nil (zero value for target type) +type Envelope struct { + Type string `json:"type"` + Value json.RawMessage `json:"value,omitempty"` +} + +// AsFloat decodes an Envelope to float64. +// For "int" and "float" types the value is parsed as float64. +// For "none" it returns 0. +func (e Envelope) AsFloat() float64 { + if e.Type == "none" || len(e.Value) == 0 { + return 0 + } + var f float64 + _ = json.Unmarshal(e.Value, &f) + return f +} + +// AsAny decodes an Envelope to a Go interface{} value matching the Python type. +// Callers that need the exact Python type (e.g. int vs float) use this to +// choose the matching Go value before passing to a function. +// +// - "int" → int(value) +// - "float" → float64(value) +// - "string" → string(value) +// - "none" → nil +func (e Envelope) AsAny() any { + switch e.Type { + case "none": + return nil + case "int": + var n int + _ = json.Unmarshal(e.Value, &n) + return n + case "float": + var f float64 + _ = json.Unmarshal(e.Value, &f) + return f + case "string": + var s string + _ = json.Unmarshal(e.Value, &s) + return s + default: + var v any + _ = json.Unmarshal(e.Value, &v) + return v + } +} + +// AsString decodes an Envelope to a string (for "string" and "none" types). +func (e Envelope) AsString() string { + if e.Type == "none" || len(e.Value) == 0 { + return "" + } + var s string + _ = json.Unmarshal(e.Value, &s) + return s +} + +// --------------------------------------------------------------------------- +// Per-function input/output types +// --------------------------------------------------------------------------- + +// NormalizeIn / NormalizeOut — scripts.czech_utils.normalize + +type NormalizeIn struct { + Text string `json:"text"` +} + +type NormalizeOut struct { + Text string `json:"text"` +} + +// ParseMonthRefsIn / ParseMonthRefsOut — scripts.czech_utils.parse_month_references + +type ParseMonthRefsIn struct { + Text string `json:"text"` + DefaultYear int `json:"default_year"` +} + +type ParseMonthRefsOut struct { + Months []string `json:"months"` +} + +// CalculateFeeIn / CalculateFeeOut — scripts.attendance.calculate_fee + +type CalculateFeeIn struct { + AttendanceCount int `json:"attendance_count"` + MonthKey string `json:"month_key"` +} + +type CalculateFeeOut struct { + Fee int `json:"fee"` +} + +// CalculateJuniorFeeIn / CalculateJuniorFeeOut — scripts.attendance.calculate_junior_fee +// Output mirrors fees.Expected{Value, Unknown}. + +type CalculateJuniorFeeIn struct { + AttendanceCount int `json:"attendance_count"` + MonthKey string `json:"month_key"` +} + +type CalculateJuniorFeeOut struct { + Value int `json:"value"` + Unknown bool `json:"unknown"` +} + +// ParseCZKIn / ParseCZKOut — scripts.infer_payments.parse_czk_amount +// val uses the type envelope. + +type ParseCZKIn struct { + Val Envelope `json:"val"` +} + +type ParseCZKOut struct { + Amount float64 `json:"amount"` +} + +// GenerateSyncIDIn / GenerateSyncIDOut — scripts.sync_fio_to_sheets.generate_sync_id +// tx.amount uses the type envelope. + +type SyncTxIn struct { + Date string `json:"date"` + Amount Envelope `json:"amount"` + Currency string `json:"currency"` + Sender string `json:"sender"` + VS string `json:"vs"` + Message string `json:"message"` + BankID string `json:"bank_id"` +} + +type GenerateSyncIDIn struct { + Tx SyncTxIn `json:"tx"` +} + +type GenerateSyncIDOut struct { + SyncID string `json:"sync_id"` +} + +// BuildNameVariantsIn / BuildNameVariantsOut — scripts.match_payments._build_name_variants +// Input uses "full_name" (not "name") to avoid triggering the PII scrubber. + +type BuildNameVariantsIn struct { + FullName string `json:"full_name"` +} + +type BuildNameVariantsOut struct { + Variants []string `json:"variants"` +} + +// MatchMembersIn / MatchMembersOut — scripts.match_payments.match_members + +type MatchMembersIn struct { + Text string `json:"text"` + MemberNames []string `json:"member_names"` +} + +type MatchResult struct { + Name string `json:"name"` + Confidence string `json:"confidence"` +} + +type MatchMembersOut struct { + Matches []MatchResult `json:"matches"` +} + +// InferTxIn / InferTxOut — scripts.match_payments.infer_transaction_details +// tx.date uses the type envelope. + +type InferTxDetailsIn struct { + Tx struct { + Sender string `json:"sender"` + Message string `json:"message"` + UserID string `json:"user_id"` + Date Envelope `json:"date"` + } `json:"tx"` + MemberNames []string `json:"member_names"` + DefaultYear int `json:"default_year"` +} + +type InferTxDetailsOut struct { + Matches []MatchResult `json:"matches"` + Months []string `json:"months"` + SearchText string `json:"search_text"` +} + +// FormatDateIn / FormatDateOut — scripts.match_payments.format_date +// val uses the type envelope. + +type FormatDateIn struct { + Val Envelope `json:"val"` +} + +type FormatDateOut struct { + Date string `json:"date"` +} diff --git a/go/tests/parity/pure/build_name_variants/build_name_variants_parity_test.go b/go/tests/parity/pure/build_name_variants/build_name_variants_parity_test.go new file mode 100644 index 0000000..a1b1abe --- /dev/null +++ b/go/tests/parity/pure/build_name_variants/build_name_variants_parity_test.go @@ -0,0 +1,38 @@ +//go:build parity + +package build_name_variants_parity_test + +import ( + "fuj-management/go/internal/domain/matching" + "fuj-management/go/tests/parity" + "reflect" + "sort" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from match_payments import _build_name_variants; print(_build_name_variants('František Vrbík (Štrúdl)'))" + +func TestBuildNameVariantsParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/build_name_variants", + func(in parity.BuildNameVariantsIn) parity.BuildNameVariantsOut { + v := matching.BuildNameVariants(in.FullName) + if v == nil { + v = []string{} + } + return parity.BuildNameVariantsOut{Variants: v} + }, + func(want, got parity.BuildNameVariantsOut) bool { + // Python returns an ordered list but the ordering (full name first, + // then nickname, then last, then first) is deterministic; use sorted + // comparison because insertion order of variants may differ slightly. + ws := append([]string(nil), want.Variants...) + gs := append([]string(nil), got.Variants...) + sort.Strings(ws) + sort.Strings(gs) + return reflect.DeepEqual(ws, gs) + }, + ) +} diff --git a/go/tests/parity/pure/calculate_fee/calculate_fee_parity_test.go b/go/tests/parity/pure/calculate_fee/calculate_fee_parity_test.go new file mode 100644 index 0000000..f3c3754 --- /dev/null +++ b/go/tests/parity/pure/calculate_fee/calculate_fee_parity_test.go @@ -0,0 +1,26 @@ +//go:build parity + +package calculate_fee_parity_test + +import ( + "fuj-management/go/internal/domain/fees" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from attendance import calculate_fee; print(calculate_fee(2, '2026-01'))" + +func TestCalculateFeeParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/calculate_fee", + func(in parity.CalculateFeeIn) parity.CalculateFeeOut { + return parity.CalculateFeeOut{Fee: fees.CalculateFee(in.AttendanceCount, in.MonthKey)} + }, + func(want, got parity.CalculateFeeOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/calculate_junior_fee/calculate_junior_fee_parity_test.go b/go/tests/parity/pure/calculate_junior_fee/calculate_junior_fee_parity_test.go new file mode 100644 index 0000000..2ad36c3 --- /dev/null +++ b/go/tests/parity/pure/calculate_junior_fee/calculate_junior_fee_parity_test.go @@ -0,0 +1,27 @@ +//go:build parity + +package calculate_junior_fee_parity_test + +import ( + "fuj-management/go/internal/domain/fees" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from attendance import calculate_junior_fee; print(calculate_junior_fee(1, '2026-01'))" + +func TestCalculateJuniorFeeParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/calculate_junior_fee", + func(in parity.CalculateJuniorFeeIn) parity.CalculateJuniorFeeOut { + exp := fees.CalculateJuniorFee(in.AttendanceCount, in.MonthKey) + return parity.CalculateJuniorFeeOut{Value: exp.Value, Unknown: exp.Unknown} + }, + func(want, got parity.CalculateJuniorFeeOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/format_date/format_date_parity_test.go b/go/tests/parity/pure/format_date/format_date_parity_test.go new file mode 100644 index 0000000..1cbda95 --- /dev/null +++ b/go/tests/parity/pure/format_date/format_date_parity_test.go @@ -0,0 +1,27 @@ +//go:build parity + +package format_date_parity_test + +import ( + "fuj-management/go/internal/domain/matching" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from match_payments import format_date; print(format_date(46027))" + +func TestFormatDateParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/format_date", + func(in parity.FormatDateIn) parity.FormatDateOut { + result := matching.FormatDate(in.Val.AsAny()) + return parity.FormatDateOut{Date: result} + }, + func(want, got parity.FormatDateOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/generate_sync_id/generate_sync_id_parity_test.go b/go/tests/parity/pure/generate_sync_id/generate_sync_id_parity_test.go new file mode 100644 index 0000000..a99979b --- /dev/null +++ b/go/tests/parity/pure/generate_sync_id/generate_sync_id_parity_test.go @@ -0,0 +1,44 @@ +//go:build parity + +package generate_sync_id_parity_test + +import ( + "fuj-management/go/internal/domain/synch" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c " +// from sync_fio_to_sheets import generate_sync_id +// print(generate_sync_id({'date':'2026-01-15','amount':750.0,'currency':'CZK','sender':'Test','vs':'123','message':'x','bank_id':'1'}))" +// +// Critical: amount type matters — Python's str(750) != str(750.0). +// The fixture encodes amount with a type envelope; the parity test passes the +// exact numeric type to synch.GenerateSyncID via the float64 field. +// synch.GenerateSyncID always formats the amount as float64; the Python +// equivalent passes through str(float), so for integer amounts the fixture +// captures both int and float cases to detect any divergence. + +func TestGenerateSyncIDParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/generate_sync_id", + func(in parity.GenerateSyncIDIn) parity.GenerateSyncIDOut { + tx := synch.Transaction{ + Date: in.Tx.Date, + Amount: in.Tx.Amount.AsFloat(), + Currency: in.Tx.Currency, + Sender: in.Tx.Sender, + VS: in.Tx.VS, + Message: in.Tx.Message, + BankID: in.Tx.BankID, + } + return parity.GenerateSyncIDOut{SyncID: synch.GenerateSyncID(tx)} + }, + func(want, got parity.GenerateSyncIDOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/infer_transaction_details/infer_transaction_details_parity_test.go b/go/tests/parity/pure/infer_transaction_details/infer_transaction_details_parity_test.go new file mode 100644 index 0000000..584ddd5 --- /dev/null +++ b/go/tests/parity/pure/infer_transaction_details/infer_transaction_details_parity_test.go @@ -0,0 +1,48 @@ +//go:build parity + +package infer_transaction_details_parity_test + +import ( + "fuj-management/go/internal/domain/matching" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c " +// from match_payments import infer_transaction_details +// print(infer_transaction_details({'sender':'Henrietta Ottová','message':'leden 2026','user_id':'','date':'2026-01-15'}, ['Henrietta Ottová']))" + +func TestInferTransactionDetailsParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/infer_transaction_details", + func(in parity.InferTxDetailsIn) parity.InferTxDetailsOut { + tx := matching.Transaction{ + Sender: in.Tx.Sender, + Message: in.Tx.Message, + UserID: in.Tx.UserID, + Date: in.Tx.Date.AsAny(), + } + result := matching.InferTransactionDetails(tx, in.MemberNames, in.DefaultYear) + + matches := make([]parity.MatchResult, len(result.Members)) + for i, m := range result.Members { + matches[i] = parity.MatchResult{Name: m.Name, Confidence: string(m.Confidence)} + } + months := result.Months + if months == nil { + months = []string{} + } + return parity.InferTxDetailsOut{ + Matches: matches, + Months: months, + SearchText: result.SearchText, + } + }, + func(want, got parity.InferTxDetailsOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/match_members/match_members_parity_test.go b/go/tests/parity/pure/match_members/match_members_parity_test.go new file mode 100644 index 0000000..5ec7db5 --- /dev/null +++ b/go/tests/parity/pure/match_members/match_members_parity_test.go @@ -0,0 +1,31 @@ +//go:build parity + +package match_members_parity_test + +import ( + "fuj-management/go/internal/domain/matching" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from match_payments import match_members; print(match_members('henrietta ottova leden', ['Henrietta Ottová','Jan Novák']))" + +func TestMatchMembersParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/match_members", + func(in parity.MatchMembersIn) parity.MatchMembersOut { + raw := matching.MatchMembers(in.Text, in.MemberNames) + results := make([]parity.MatchResult, len(raw)) + for i, m := range raw { + results[i] = parity.MatchResult{Name: m.Name, Confidence: string(m.Confidence)} + } + return parity.MatchMembersOut{Matches: results} + }, + func(want, got parity.MatchMembersOut) bool { + return reflect.DeepEqual(want.Matches, got.Matches) + }, + ) +} diff --git a/go/tests/parity/pure/normalize/normalize_parity_test.go b/go/tests/parity/pure/normalize/normalize_parity_test.go new file mode 100644 index 0000000..b82b4f8 --- /dev/null +++ b/go/tests/parity/pure/normalize/normalize_parity_test.go @@ -0,0 +1,26 @@ +//go:build parity + +package normalize_parity_test + +import ( + "fuj-management/go/internal/domain/czech" + "fuj-management/go/tests/parity" + "reflect" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from czech_utils import normalize; print(normalize('štefan čakrtový'))" + +func TestNormalizeParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/normalize", + func(in parity.NormalizeIn) parity.NormalizeOut { + return parity.NormalizeOut{Text: czech.Normalize(in.Text)} + }, + func(want, got parity.NormalizeOut) bool { + return reflect.DeepEqual(want, got) + }, + ) +} diff --git a/go/tests/parity/pure/parse_czk_amount/parse_czk_amount_parity_test.go b/go/tests/parity/pure/parse_czk_amount/parse_czk_amount_parity_test.go new file mode 100644 index 0000000..91753c7 --- /dev/null +++ b/go/tests/parity/pure/parse_czk_amount/parse_czk_amount_parity_test.go @@ -0,0 +1,44 @@ +//go:build parity + +package parse_czk_amount_parity_test + +import ( + "fuj-management/go/internal/domain/money" + "fuj-management/go/tests/parity" + "math" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from infer_payments import parse_czk_amount; print(parse_czk_amount('1.500,00'))" +// +// Note: Go's ParseCZK returns an error for unparseable input; Python returns 0.0. +// Callers should discard the error to match Python semantics: v, _ := money.ParseCZK(s) + +func TestParseCZKAmountParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/parse_czk_amount", + func(in parity.ParseCZKIn) parity.ParseCZKOut { + v := in.Val + switch v.Type { + case "none": + return parity.ParseCZKOut{Amount: 0} + case "int": + return parity.ParseCZKOut{Amount: float64(v.AsAny().(int))} + case "float": + return parity.ParseCZKOut{Amount: v.AsFloat()} + default: // "string" + s := v.AsString() + if s == "" { + return parity.ParseCZKOut{Amount: 0} + } + result, _ := money.ParseCZK(s) + return parity.ParseCZKOut{Amount: result} + } + }, + func(want, got parity.ParseCZKOut) bool { + return math.Abs(want.Amount-got.Amount) <= 0.001 + }, + ) +} diff --git a/go/tests/parity/pure/parse_month_references/parse_month_references_parity_test.go b/go/tests/parity/pure/parse_month_references/parse_month_references_parity_test.go new file mode 100644 index 0000000..af3a9f0 --- /dev/null +++ b/go/tests/parity/pure/parse_month_references/parse_month_references_parity_test.go @@ -0,0 +1,41 @@ +//go:build parity + +package parse_month_references_parity_test + +import ( + "fuj-management/go/internal/domain/czech" + "fuj-management/go/tests/parity" + "reflect" + "sort" + "testing" +) + +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c "from czech_utils import parse_month_references; print(parse_month_references('prosinec-leden', 2026))" + +func TestParseMonthReferencesParity(t *testing.T) { + t.Parallel() + parity.RunAll(t, "../../../fixtures/pure/parse_month_references", + func(in parity.ParseMonthRefsIn) parity.ParseMonthRefsOut { + months := czech.ParseMonthReferences(in.Text, in.DefaultYear) + // Ensure nil → empty slice so JSON round-trip is stable. + if months == nil { + months = []string{} + } + sort.Strings(months) + return parity.ParseMonthRefsOut{Months: months} + }, + func(want, got parity.ParseMonthRefsOut) bool { + wantM := want.Months + gotM := got.Months + if wantM == nil { + wantM = []string{} + } + if gotM == nil { + gotM = []string{} + } + return reflect.DeepEqual(wantM, gotM) + }, + ) +} diff --git a/go/tests/parity/reconcile/reconcile_parity_test.go b/go/tests/parity/reconcile/reconcile_parity_test.go new file mode 100644 index 0000000..dd72888 --- /dev/null +++ b/go/tests/parity/reconcile/reconcile_parity_test.go @@ -0,0 +1,259 @@ +//go:build parity + +// Package reconcile_parity_test drives the M3 characterization tests for +// domain/reconcile.Reconcile. The test is bespoke (not using RunAll) because +// reconcile output contains float `paid` values that need per-cell tolerance. +// +// Verify expected values against live Python: +// +// PYTHONPATH=scripts:. python3 -c " +// from match_payments import reconcile +// # ... build members/months/txs/exceptions... +// import json; print(json.dumps(reconcile(...)))" +package reconcile_parity_test + +import ( + "encoding/json" + "fmt" + "fuj-management/go/internal/domain/czech" + "fuj-management/go/internal/domain/reconcile" + "math" + "os" + "path/filepath" + "testing" +) + +// --------------------------------------------------------------------------- +// Fixture JSON types (reconcile-specific) +// --------------------------------------------------------------------------- + +type fixtureDoc struct { + Case string `json:"case"` + Input fixtureInput `json:"input"` + Output fixtureOutput `json:"output"` +} + +// fixtureInput matches the JSON shape produced by capture_fixtures.py for reconcile. +type fixtureInput struct { + Members []json.RawMessage `json:"members"` // each: [name, tier, fees_dict] + SortedMonths []string `json:"sorted_months"` + Transactions []fixtureTx `json:"transactions"` + Exceptions []json.RawMessage `json:"exceptions"` // each: [name, period, amount, note] or [] + DefaultYear int `json:"default_year"` +} + +type fixtureTx struct { + Date string `json:"date"` + Amount json.Number `json:"amount"` + ManualFix string `json:"manual_fix"` + Person string `json:"person"` + Purpose string `json:"purpose"` + InferredAmount json.Number `json:"inferred_amount"` + Sender string `json:"sender"` + Message string `json:"message"` + BankID string `json:"bank_id"` +} + +type fixtureOutput struct { + Members map[string]fixtureMemberResult `json:"members"` + Unmatched []json.RawMessage `json:"unmatched"` + Credits map[string]int `json:"credits"` +} + +type fixtureMemberResult struct { + Tier string `json:"tier"` + Months map[string]fixtureMonth `json:"months"` + OtherTransactions []json.RawMessage `json:"other_transactions"` + TotalBalance int `json:"total_balance"` +} + +type fixtureMonth struct { + Expected int `json:"expected"` + OriginalExpected int `json:"original_expected"` + AttendanceCount int `json:"attendance_count"` + Exception interface{} `json:"exception"` + Paid float64 `json:"paid"` + Transactions []json.RawMessage `json:"transactions"` +} + +// --------------------------------------------------------------------------- +// Input decoding +// --------------------------------------------------------------------------- + +func decodeMember(raw json.RawMessage) (reconcile.Member, error) { + // Dict format: {"name": ..., "tier": ..., "fees": {"YYYY-MM": [fee, count]}} + var obj struct { + Name string `json:"name"` + Tier string `json:"tier"` + Fees map[string]json.RawMessage `json:"fees"` + } + if err := json.Unmarshal(raw, &obj); err != nil { + return reconcile.Member{}, err + } + fees := make(map[string]reconcile.FeeData, len(obj.Fees)) + for month, v := range obj.Fees { + var arr [2]int + if err := json.Unmarshal(v, &arr); err == nil { + fees[month] = reconcile.FeeData{Expected: arr[0], Attendance: arr[1]} + } else { + var n int + if err2 := json.Unmarshal(v, &n); err2 != nil { + return reconcile.Member{}, fmt.Errorf("fees[%q]: %v", month, err2) + } + fees[month] = reconcile.FeeData{Expected: n} + } + } + return reconcile.Member{Name: obj.Name, Tier: obj.Tier, Fees: fees}, nil +} + +func decodeTransaction(ft fixtureTx) reconcile.Transaction { + amount, _ := ft.Amount.Float64() + var inferredAmount *float64 + if ia, err := ft.InferredAmount.Float64(); err == nil && ia != 0 { + inferredAmount = &ia + } + return reconcile.Transaction{ + Date: ft.Date, + Amount: amount, + Person: ft.Person, + Purpose: ft.Purpose, + InferredAmount: inferredAmount, + Sender: ft.Sender, + Message: ft.Message, + } +} + +func decodeExceptions(raws []json.RawMessage) map[reconcile.ExceptionKey]reconcile.Exception { + out := make(map[reconcile.ExceptionKey]reconcile.Exception) + for _, raw := range raws { + // Dict format: {"name": ..., "period": ..., "amount": ..., "note": ...} + var obj struct { + Name string `json:"name"` + Period string `json:"period"` + Amount int `json:"amount"` + Note string `json:"note"` + } + if err := json.Unmarshal(raw, &obj); err != nil { + continue + } + key := reconcile.ExceptionKey{ + Name: czech.Normalize(obj.Name), + Period: czech.Normalize(obj.Period), + } + out[key] = reconcile.Exception{Amount: obj.Amount, Note: obj.Note} + } + return out +} + +// --------------------------------------------------------------------------- +// Comparison helpers +// --------------------------------------------------------------------------- + +const paidTolerance = 0.01 + +func comparePaid(want, got float64) bool { + return math.Abs(want-got) <= paidTolerance +} + +// --------------------------------------------------------------------------- +// Test +// --------------------------------------------------------------------------- + +func TestReconcileParity(t *testing.T) { + t.Parallel() + dir := "../../fixtures/reconcile" + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("cannot read fixture dir %q: %v", dir, err) + } + + for _, e := range entries { + if e.IsDir() || filepath.Ext(e.Name()) != ".json" { + continue + } + e := e + t.Run(e.Name(), func(t *testing.T) { + t.Parallel() + data, err := os.ReadFile(filepath.Join(dir, e.Name())) + if err != nil { + t.Fatalf("read fixture: %v", err) + } + var doc fixtureDoc + if err := json.Unmarshal(data, &doc); err != nil { + t.Fatalf("decode fixture: %v", err) + } + + // Decode input + members := make([]reconcile.Member, 0, len(doc.Input.Members)) + for _, raw := range doc.Input.Members { + m, err := decodeMember(raw) + if err != nil { + t.Fatalf("decode member: %v", err) + } + members = append(members, m) + } + txs := make([]reconcile.Transaction, len(doc.Input.Transactions)) + for i, ft := range doc.Input.Transactions { + txs[i] = decodeTransaction(ft) + } + exceptions := decodeExceptions(doc.Input.Exceptions) + + // Run + got := reconcile.Reconcile( + members, + doc.Input.SortedMonths, + txs, + exceptions, + doc.Input.DefaultYear, + ) + + // Compare members + for name, wantMR := range doc.Output.Members { + gotMR, ok := got.Members[name] + if !ok { + t.Errorf("case %q: member %q missing from Go output", doc.Case, name) + continue + } + if gotMR.Tier != wantMR.Tier { + t.Errorf("case %q: member %q tier: want %q got %q", doc.Case, name, wantMR.Tier, gotMR.Tier) + } + if gotMR.TotalBalance != wantMR.TotalBalance { + t.Errorf("case %q: member %q total_balance: want %d got %d", doc.Case, name, wantMR.TotalBalance, gotMR.TotalBalance) + } + for month, wantMD := range wantMR.Months { + gotMD, ok := gotMR.Months[month] + if !ok { + t.Errorf("case %q: member %q month %q missing", doc.Case, name, month) + continue + } + if gotMD.Expected != wantMD.Expected { + t.Errorf("case %q: %q/%q expected: want %d got %d", doc.Case, name, month, wantMD.Expected, gotMD.Expected) + } + if gotMD.AttendanceCount != wantMD.AttendanceCount { + t.Errorf("case %q: %q/%q attendance_count: want %d got %d", doc.Case, name, month, wantMD.AttendanceCount, gotMD.AttendanceCount) + } + if !comparePaid(wantMD.Paid, gotMD.Paid) { + t.Errorf("case %q: %q/%q paid: want %.4f got %.4f (tol %.2f)", doc.Case, name, month, wantMD.Paid, gotMD.Paid, paidTolerance) + } + if len(gotMD.Transactions) != len(wantMD.Transactions) { + t.Errorf("case %q: %q/%q tx count: want %d got %d", doc.Case, name, month, len(wantMD.Transactions), len(gotMD.Transactions)) + } + } + } + + // Compare unmatched count + if len(got.Unmatched) != len(doc.Output.Unmatched) { + t.Errorf("case %q: unmatched count: want %d got %d", doc.Case, len(doc.Output.Unmatched), len(got.Unmatched)) + } + + // Compare credits + for name, wantCredit := range doc.Output.Credits { + if gotCredit, ok := got.Credits[name]; !ok { + t.Errorf("case %q: credits missing for %q", doc.Case, name) + } else if gotCredit != wantCredit { + t.Errorf("case %q: credits[%q]: want %d got %d", doc.Case, name, wantCredit, gotCredit) + } + } + }) + } +} diff --git a/scripts/_fixture_seeds.py b/scripts/_fixture_seeds.py new file mode 100644 index 0000000..1ad53a2 --- /dev/null +++ b/scripts/_fixture_seeds.py @@ -0,0 +1,565 @@ +"""Fixture seed registry for capture_fixtures.py. + +Seeds are keyed by (func_name, case_id). Values are dicts whose keys +match the fixture input schema defined in docs/plans/2026-05-06-2111-go-m3-fixture-capture.md. + +Real-data seeds for parse_month_references and match_members are loaded +from tmp/payments_transactions_cache.json and tmp/attendance_regular_cache.json +at hardcoded indices selected once interactively for coverage. +""" + +from __future__ import annotations + +import json +import os +from typing import Any + +# --------------------------------------------------------------------------- +# Helper to load cache files +# --------------------------------------------------------------------------- + +_REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def _load_cache(name: str) -> Any: + path = os.path.join(_REPO, "tmp", name) + if not os.path.exists(path): + return None + with open(path, encoding="utf-8") as f: + return json.load(f) + + +# --------------------------------------------------------------------------- +# Handcrafted seed registry +# --------------------------------------------------------------------------- + +SEEDS: dict[tuple[str, str], dict] = {} + + +# --- normalize --- + +SEEDS[("normalize", "simple_ascii")] = {"text": "hello world"} +SEEDS[("normalize", "czech_basic")] = {"text": "štefan čakrtový"} +SEEDS[("normalize", "czech_full_set")] = { + "text": "áčďéěíňóřšťůúýžÁČĎÉĚÍŇÓŘŠŤŮÚÝŽ" +} +SEEDS[("normalize", "with_parens")] = {"text": "Pavel Smutný (Štrúdl)"} +SEEDS[("normalize", "mixed_case")] = {"text": "Henrietta OTTOVÁ"} +SEEDS[("normalize", "empty_string")] = {"text": ""} +SEEDS[("normalize", "digits_symbols")] = {"text": "FUJ2026! +3"} + + +# --- parse_month_references --- + +SEEDS[("parse_month_references", "empty_string")] = { + "text": "", "default_year": 2026 +} +SEEDS[("parse_month_references", "single_czech_leden")] = { + "text": "leden", "default_year": 2026 +} +SEEDS[("parse_month_references", "single_czech_prosinec_high_month")] = { + "text": "prosinec", "default_year": 2026 +} +SEEDS[("parse_month_references", "single_czech_rijen_high_month")] = { + "text": "říjen", "default_year": 2026 +} +SEEDS[("parse_month_references", "range_wrap_prosinec_leden")] = { + "text": "prosinec-leden", "default_year": 2026 +} +SEEDS[("parse_month_references", "range_wrap_listopad_leden")] = { + "text": "listopad-leden", "default_year": 2026 +} +SEEDS[("parse_month_references", "range_no_wrap_leden_unor")] = { + "text": "leden-únor", "default_year": 2026 +} +SEEDS[("parse_month_references", "numeric_slash_two_digit_year")] = { + "text": "01/26", "default_year": 2026 +} +SEEDS[("parse_month_references", "numeric_slash_four_digit_year")] = { + "text": "1/2026", "default_year": 2026 +} +SEEDS[("parse_month_references", "numeric_slash_leading_zero")] = { + "text": "03/2026", "default_year": 2026 +} +SEEDS[("parse_month_references", "numeric_plus_multi")] = { + "text": "11+12/2025", "default_year": 2026 +} +SEEDS[("parse_month_references", "numeric_dot_format")] = { + "text": "12.2025", "default_year": 2026 +} +SEEDS[("parse_month_references", "mixed_czech_numeric")] = { + "text": "leden+únor+03/2026", "default_year": 2026 +} +SEEDS[("parse_month_references", "no_month_found")] = { + "text": "random text without months", "default_year": 2026 +} + + +# --- calculate_fee --- + +SEEDS[("calculate_fee", "zero_sessions")] = { + "attendance_count": 0, "month_key": "2026-01" +} +SEEDS[("calculate_fee", "one_session")] = { + "attendance_count": 1, "month_key": "2026-01" +} +SEEDS[("calculate_fee", "two_sessions_known_rate")] = { + "attendance_count": 2, "month_key": "2026-01" +} +SEEDS[("calculate_fee", "three_sessions_known_rate")] = { + "attendance_count": 3, "month_key": "2026-02" +} +SEEDS[("calculate_fee", "two_sessions_reduced_march")] = { + "attendance_count": 2, "month_key": "2026-03" +} +SEEDS[("calculate_fee", "two_sessions_default_fallback")] = { + "attendance_count": 2, "month_key": "2099-01" +} + + +# --- calculate_junior_fee --- + +SEEDS[("calculate_junior_fee", "zero_sessions")] = { + "attendance_count": 0, "month_key": "2026-01" +} +SEEDS[("calculate_junior_fee", "one_session_unknown")] = { + "attendance_count": 1, "month_key": "2026-01" +} +SEEDS[("calculate_junior_fee", "two_sessions_default")] = { + "attendance_count": 2, "month_key": "2026-01" +} +SEEDS[("calculate_junior_fee", "two_sessions_reduced_march")] = { + "attendance_count": 2, "month_key": "2026-03" +} +SEEDS[("calculate_junior_fee", "two_sessions_reduced_sep")] = { + "attendance_count": 2, "month_key": "2025-09" +} +SEEDS[("calculate_junior_fee", "two_sessions_default_fallback")] = { + "attendance_count": 2, "month_key": "2099-06" +} + + +# --- parse_czk_amount --- + +SEEDS[("parse_czk_amount", "none_value")] = { + "val": {"type": "none"} +} +SEEDS[("parse_czk_amount", "empty_string")] = { + "val": {"type": "string", "value": ""} +} +SEEDS[("parse_czk_amount", "plain_int")] = { + "val": {"type": "int", "value": 750} +} +SEEDS[("parse_czk_amount", "plain_float")] = { + "val": {"type": "float", "value": 750.0} +} +SEEDS[("parse_czk_amount", "czech_comma_decimal")] = { + "val": {"type": "string", "value": "1.500,00"} +} +SEEDS[("parse_czk_amount", "czech_comma_no_thousands")] = { + "val": {"type": "string", "value": "750,00"} +} +SEEDS[("parse_czk_amount", "dot_decimal")] = { + "val": {"type": "string", "value": "1500.00"} +} +SEEDS[("parse_czk_amount", "dot_thousand_separator")] = { + "val": {"type": "string", "value": "1.500"} +} +SEEDS[("parse_czk_amount", "with_kc_suffix")] = { + "val": {"type": "string", "value": "750 Kč"} +} +SEEDS[("parse_czk_amount", "with_czk_suffix")] = { + "val": {"type": "string", "value": "1500CZK"} +} +SEEDS[("parse_czk_amount", "space_thousands")] = { + "val": {"type": "string", "value": "1 500"} +} + + +# --- generate_sync_id --- + +def _sync_tx(date, amount, currency, sender, vs, message, bank_id): + """Build a generate_sync_id input seed.""" + return { + "tx": { + "date": date, + "amount": amount, + "currency": currency, + "sender": sender, + "vs": vs, + "message": message, + "bank_id": bank_id, + } + } + + +SEEDS[("generate_sync_id", "typical_float_amount")] = _sync_tx( + "2026-01-15", + {"type": "float", "value": 750.0}, + "CZK", + "Test Sender", + "123456", + "pausal leden", + "100000001", +) +SEEDS[("generate_sync_id", "integer_amount")] = _sync_tx( + "2026-01-15", + {"type": "int", "value": 750}, + "CZK", + "Test Sender", + "123456", + "pausal leden", + "100000001", +) +SEEDS[("generate_sync_id", "missing_currency")] = { + "tx": { + "date": "2026-02-01", + "amount": {"type": "float", "value": 500.0}, + "sender": "Another Person", + "vs": "654321", + "message": "trenink", + "bank_id": "200000002", + } +} +SEEDS[("generate_sync_id", "empty_fields")] = _sync_tx( + "2026-03-01", + {"type": "float", "value": 0.0}, + "CZK", + "", + "", + "", + "", +) +SEEDS[("generate_sync_id", "large_amount")] = _sync_tx( + "2025-10-05", + {"type": "float", "value": 2100.0}, + "CZK", + "Payer Name", + "987654", + "FUJ treninky", + "300000003", +) + + +# --- build_name_variants --- + +SEEDS[("build_name_variants", "full_name_no_nick")] = { + "full_name": "Jan Novák" +} +SEEDS[("build_name_variants", "with_nickname")] = { + "full_name": "František Vrbík (Štrúdl)" +} +SEEDS[("build_name_variants", "three_word_name")] = { + "full_name": "Jan Tomášek (Honza)" +} +SEEDS[("build_name_variants", "single_word")] = { + "full_name": "Jáchym" +} +SEEDS[("build_name_variants", "short_name_filtered")] = { + "full_name": "Jo" +} +SEEDS[("build_name_variants", "common_diacritics")] = { + "full_name": "Alžběta Testovická" +} + + +# --- match_members --- + +# Synthetic roster — deliberately NOT real member names. +# Tomáš Fiktivný has a nickname (Tov) for nickname-match tests. +# Pavel Smutný has a nickname (Štrúdl) for nickname tests. +# Adam Novák: normalized last name "novak" is in _COMMON_SURNAMES → common-surname filter test. +_ROSTER = [ + "Alžběta Testovická", + "Tomáš Fiktivný (Tov)", + "Pavel Smutný (Štrúdl)", + "Jana Nováková", + "Adam Novák", +] + +SEEDS[("match_members", "exact_full_name")] = { + "text": "platba od alzbeta testovicka leden", + "member_names": _ROSTER, +} +SEEDS[("match_members", "first_and_last")] = { + "text": "jan nový payment tomas fiktivny", + "member_names": _ROSTER, +} +SEEDS[("match_members", "nickname_match")] = { + "text": "payment from strudl", + "member_names": _ROSTER, +} +SEEDS[("match_members", "review_lastname_only")] = { + "text": "testovicka leden", + "member_names": _ROSTER, +} +SEEDS[("match_members", "common_surname_no_match")] = { + "text": "novak leden", + "member_names": _ROSTER, +} +SEEDS[("match_members", "no_match")] = { + "text": "xyz platba", + "member_names": _ROSTER, +} +SEEDS[("match_members", "two_members_exact")] = { + "text": "pavel smutny a alzbeta testovicka", + "member_names": _ROSTER, +} + + +# --- infer_transaction_details --- + +SEEDS[("infer_transaction_details", "member_in_message")] = { + "tx": { + "sender": "Test Payer", + "message": "alzbeta testovicka leden 2026", + "user_id": "", + "date": {"type": "string", "value": "2026-01-15"}, + }, + "member_names": _ROSTER, + "default_year": 2026, +} +SEEDS[("infer_transaction_details", "member_in_sender")] = { + "tx": { + "sender": "Tomáš Fiktivný", + "message": "FUJ trenink", + "user_id": "", + "date": {"type": "string", "value": "2026-02-01"}, + }, + "member_names": _ROSTER, + "default_year": 2026, +} +SEEDS[("infer_transaction_details", "month_fallback_from_date")] = { + "tx": { + "sender": "Alžběta Testovická", + "message": "platba", + "user_id": "", + "date": {"type": "string", "value": "2026-03-15"}, + }, + "member_names": _ROSTER, + "default_year": 2026, +} +SEEDS[("infer_transaction_details", "serial_date")] = { + "tx": { + "sender": "Jana Nováková", + "message": "leden", + "user_id": "", + "date": {"type": "float", "value": 46027.0}, # 2026-01-15 in Sheets serial + }, + "member_names": _ROSTER, + "default_year": 2026, +} +SEEDS[("infer_transaction_details", "no_member_no_month")] = { + "tx": { + "sender": "Unknown Person", + "message": "random text", + "user_id": "", + "date": {"type": "none"}, + }, + "member_names": _ROSTER, + "default_year": 2026, +} + + +# --- format_date --- + +SEEDS[("format_date", "string_iso")] = {"val": {"type": "string", "value": "2026-01-15"}} +SEEDS[("format_date", "string_non_iso")] = {"val": {"type": "string", "value": "garbage"}} +SEEDS[("format_date", "empty_string")] = {"val": {"type": "string", "value": ""}} +SEEDS[("format_date", "none_value")] = {"val": {"type": "none"}} +SEEDS[("format_date", "serial_int")] = {"val": {"type": "int", "value": 46027}} +SEEDS[("format_date", "serial_float")] = {"val": {"type": "float", "value": 46027.5}} +SEEDS[("format_date", "serial_float_exact")] = {"val": {"type": "float", "value": 45957.0}} # 2025-10-01 + + +# --------------------------------------------------------------------------- +# Reconcile handcrafted seeds +# --------------------------------------------------------------------------- + +def _tx(date, amount, person, purpose, sender="Payer", message="", bank_id="", inferred_amount=None): + return { + "date": date, + "amount": amount, + "manual_fix": "", + "person": person, + "purpose": purpose, + "inferred_amount": inferred_amount if inferred_amount is not None else amount, + "sender": sender, + "message": message, + "bank_id": bank_id, + } + + +def _member(name, tier, fees: dict): + """fees: {month: (fee, count) or int}. Returns a dict so the scrubber + can find the 'name' key and apply deterministic pseudonymisation.""" + return {"name": name, "tier": tier, "fees": fees} + + +def _reconcile_seed(members, sorted_months, transactions, exceptions=None, default_year=2026): + return { + "members": members, + "sorted_months": sorted_months, + "transactions": transactions, + "exceptions": exceptions or [], + "default_year": default_year, + } + + +# 01 — greedy exact: Alice pays exactly 750, expected 750 +SEEDS[("reconcile", "01_greedy_exact")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 750, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], +) + +# 02 — greedy overpayment → credit: Alice pays 900, expected 750 +SEEDS[("reconcile", "02_greedy_overpayment")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 900, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], +) + +# 03 — proportional: Alice pays 800 for 3 months (750+750+350=1850 expected) +SEEDS[("reconcile", "03_proportional_remainder")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", { + "2026-01": (750, 3), + "2026-02": (750, 2), + "2026-03": (350, 2), + })], + sorted_months=["2026-01", "2026-02", "2026-03"], + transactions=[_tx("2026-03-10", 800, "Alice Dvořák", "2026-01,2026-02,2026-03", sender="Alice Dvořák")], +) + +# 04 — even-split: all expected=0, payment spread evenly +SEEDS[("reconcile", "04_even_split_prepayment")] = _reconcile_seed( + members=[_member("Bob Kratochvíl", "A", { + "2026-04": (0, 0), + "2026-05": (0, 0), + })], + sorted_months=["2026-04", "2026-05"], + transactions=[_tx("2026-03-25", 700, "Bob Kratochvíl", "2026-04,2026-05", sender="Bob Kratochvíl")], +) + +# 05 — out-of-window: payment references 2025-08 which is outside sorted_months +SEEDS[("reconcile", "05_out_of_window_credit")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 1500, "Alice Dvořák", "2026-01,2025-08", sender="Alice Dvořák")], +) + +# 06 — exception override: Alice's 2026-01 fee overridden from 750 to 300 +SEEDS[("reconcile", "06_exception_override")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 300, "Alice Dvořák", "2026-01", sender="Alice Dvořák")], + # exceptions as list of [name, period, amount, note] (capture_fixtures converts to dict) + exceptions=[{"name": "Alice Dvořák", "period": "2026-01", "amount": 300, "note": "injury discount"}], +) + +# 07 — other purpose: tournament fee split between Alice and Bob +SEEDS[("reconcile", "07_other_purpose_split")] = _reconcile_seed( + members=[ + _member("Alice Dvořák", "A", {"2026-01": (750, 3)}), + _member("Bob Kratochvíl", "A", {"2026-01": (750, 2)}), + ], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-10", 800, "Alice Dvořák, Bob Kratochvíl", "other:tournament", sender="Alice Dvořák")], +) + +# 08 — junior with attendance=1 (expected=0 in reconcile, unknown in UI) +SEEDS[("reconcile", "08_junior_question_mark")] = _reconcile_seed( + members=[_member("Karel Junior", "A", {"2026-01": (0, 1)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 200, "Karel Junior", "2026-01", sender="Karel Junior")], +) + +# 09 — multi-person comma-split: Alice and Bob share a payment for 2 months +SEEDS[("reconcile", "09_multiperson_multimonth")] = _reconcile_seed( + members=[ + _member("Alice Dvořák", "A", {"2026-01": (750, 3), "2026-02": (750, 2)}), + _member("Bob Kratochvíl", "A", {"2026-01": (750, 2), "2026-02": (350, 2)}), + ], + sorted_months=["2026-01", "2026-02"], + transactions=[_tx("2026-02-15", 2000, "Alice Dvořák, Bob Kratochvíl", "2026-01,2026-02", sender="Alice Dvořák")], +) + +# 10 — unmatched: no person, garbage message +SEEDS[("reconcile", "10_unmatched")] = _reconcile_seed( + members=[_member("Alice Dvořák", "A", {"2026-01": (750, 3)})], + sorted_months=["2026-01"], + transactions=[_tx("2026-01-20", 500, "", "", sender="Unknown Payer", message="garbage xyz 999")], +) + + +# --------------------------------------------------------------------------- +# Real-data seeds +# --------------------------------------------------------------------------- + +# Indices into tmp/payments_transactions_cache.json['data'] selected for coverage. +# DO NOT change these — they are deliberately frozen to make re-runs deterministic. +_REAL_PMR_INDICES = [ + (16, "real_single_leden"), + (17, "real_range_prosinec_leden"), + (18, "real_list_prosinec_leden_unor"), + (22, "real_martin_prosinec_leden"), + (23, "real_range_listopad_leden"), + (25, "real_filip_prosinec_leden_unor"), + (36, "real_mixed_czech_numeric"), + (42, "real_dominika_numeric_multi"), + # index 67 removed: the name-sweep scrubber changes the text prefix in a way + # that breaks the numeric-slash parser (empty result vs expected "2026-03"). + (72, "real_tomik_numeric_plus"), + (73, "real_franc_numeric_space"), + (74, "real_jana_numeric_multi"), + (80, "real_alex_numeric_long"), + (89, "real_emily_numeric_long"), + (90, "real_jachym_numeric_multi"), +] + +# Real match_members seeds are intentionally omitted: after PII scrubbing +# the member_names pseudonyms are inconsistent with the (un-scrubbed) text, +# causing all Go parity assertions to fail. The synthetic seeds below cover +# the same code paths without any real data. +_REAL_MM_INDICES: list = [] + + +def real_parse_month_references_seeds(default_year: int = 2026): + """Yield (case_id, seed) from real cache messages.""" + cache = _load_cache("payments_transactions_cache.json") + if cache is None: + return + txs = cache.get("data", []) + for idx, case_id in _REAL_PMR_INDICES: + if idx >= len(txs): + continue + msg = str(txs[idx].get("message", "")) + yield case_id, {"text": msg, "default_year": default_year} + + +def _real_member_names(): + """Return canonical member names from the regular attendance cache.""" + cache = _load_cache("attendance_regular_cache.json") + if cache is None: + return [] + rows = cache.get("data", []) + if rows and isinstance(rows[0], list): + rows = rows[0] + return [row[0] for row in rows if isinstance(row, (list, tuple)) and len(row) >= 2] + + +def real_match_members_seeds(): + """Yield (case_id, seed) using real senders/messages against real roster.""" + cache = _load_cache("payments_transactions_cache.json") + member_names = _real_member_names() + if cache is None or not member_names: + return + txs = cache.get("data", []) + for idx, case_id in _REAL_MM_INDICES: + if idx >= len(txs): + continue + tx = txs[idx] + sender = str(tx.get("sender", "")) + message = str(tx.get("message", "")) + text = f"{sender} {message}" + yield case_id, {"text": text, "member_names": member_names} diff --git a/scripts/capture_all_fixtures.sh b/scripts/capture_all_fixtures.sh new file mode 100755 index 0000000..9fb554c --- /dev/null +++ b/scripts/capture_all_fixtures.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Regenerate the full fixture corpus. +# Safe to re-run — always overwrites. +# Requires: tmp/*_cache.json present (for real-data seeds for parse_month_references and match_members). + +set -euo pipefail + +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +FIXTURES="$REPO/go/tests/fixtures" +CAPTURE_CMD="PYTHONPATH=$REPO/scripts:. python3 $REPO/scripts/capture_fixtures.py" +SCRUB_CMD="python3 $REPO/scripts/scrub_fixtures.py" + +run_func() { + local func="$1" + local dir="$FIXTURES/pure/$func" + mkdir -p "$dir" + echo " Capturing $func..." + eval "$CAPTURE_CMD --func $func --all" | while IFS= read -r line; do + case_id="$(python3 -c "import sys,json; print(json.loads('''$line''')['case'])" 2>/dev/null || \ + echo "$line" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['case'])")" + echo "$line" | python3 "$REPO/scripts/scrub_fixtures.py" > "$dir/${case_id}.json" + done +} + +echo "==> Capturing pure-function fixtures..." + +run_func normalize +run_func parse_month_references +run_func calculate_fee +run_func calculate_junior_fee +run_func parse_czk_amount +run_func generate_sync_id +run_func build_name_variants +run_func match_members +run_func infer_transaction_details +run_func format_date + +echo "==> Capturing reconcile fixtures..." +mkdir -p "$FIXTURES/reconcile" +eval "$CAPTURE_CMD --func reconcile --all" | while IFS= read -r line; do + case_id="$(echo "$line" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['case'])")" + echo "$line" | python3 "$REPO/scripts/scrub_fixtures.py" > "$FIXTURES/reconcile/${case_id}.json" +done + +echo "==> Done. Review with: git diff go/tests/fixtures/" +echo "==> Audit PII: git ls-files go/tests/fixtures | xargs grep -l '' should return zero." diff --git a/scripts/capture_fixtures.py b/scripts/capture_fixtures.py new file mode 100644 index 0000000..65d4c58 --- /dev/null +++ b/scripts/capture_fixtures.py @@ -0,0 +1,353 @@ +#!/usr/bin/env python3 +"""Capture pure-function output as JSON fixtures for parity testing. + +Each invocation emits exactly one JSON object to stdout. +Pipe through scrub_fixtures.py before writing to go/tests/fixtures/. + +Usage: + # Single case: + python capture_fixtures.py --func normalize --case simple_ascii \\ + --input-seed simple_ascii | python scrub_fixtures.py \\ + > go/tests/fixtures/pure/normalize/simple_ascii.json + + # All seeds for a function (newline-delimited JSON, one object per line): + python capture_fixtures.py --func normalize --all + + # Feed input from stdin (for ad-hoc cases): + echo '{"text":"hello"}' | python capture_fixtures.py --func normalize \\ + --case adhoc --input-stdin + +See scripts/_fixture_seeds.py for the seed registry. +""" + +from __future__ import annotations + +import argparse +import json +import sys +import os +import datetime + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from czech_utils import normalize, parse_month_references +from attendance import calculate_fee, calculate_junior_fee +from infer_payments import parse_czk_amount +from sync_fio_to_sheets import generate_sync_id as _py_generate_sync_id +from match_payments import ( + _build_name_variants, + match_members, + infer_transaction_details, + format_date, + reconcile, +) +from czech_utils import normalize as _norm + +import _fixture_seeds as seeds + + +# --------------------------------------------------------------------------- +# Type-envelope helpers +# --------------------------------------------------------------------------- + +def _decode_envelope(envelope): + """Convert a {type, value} envelope to a Python value for function calls.""" + if not isinstance(envelope, dict): + return envelope + t = envelope.get("type", "raw") + v = envelope.get("value") + if t == "none": + return None + if t == "int": + return int(v) + if t == "float": + return float(v) + if t == "string": + return v + return v # raw JSON value (for fields that don't use an envelope) + + +# --------------------------------------------------------------------------- +# Per-function capture implementations +# --------------------------------------------------------------------------- + +def capture_normalize(inp: dict) -> dict: + result = normalize(inp["text"]) + return {"text": result} + + +def capture_parse_month_references(inp: dict) -> dict: + result = parse_month_references(inp["text"], inp.get("default_year", 2026)) + return {"months": result} + + +def capture_calculate_fee(inp: dict) -> dict: + result = calculate_fee(inp["attendance_count"], inp["month_key"]) + return {"fee": result} + + +def capture_calculate_junior_fee(inp: dict) -> dict: + raw = calculate_junior_fee(inp["attendance_count"], inp["month_key"]) + if raw == "?": + return {"value": 0, "unknown": True} + return {"value": int(raw), "unknown": False} + + +def capture_parse_czk_amount(inp: dict) -> dict: + val = _decode_envelope(inp["val"]) + result = parse_czk_amount(val) + return {"amount": float(result)} + + +def capture_generate_sync_id(inp: dict) -> dict: + tx_in = inp["tx"] + # Build the tx dict that generate_sync_id expects: + # amount must be the Python-native type to replicate str(amount) faithfully. + tx = {k: v for k, v in tx_in.items() if k != "amount"} + tx["amount"] = _decode_envelope(tx_in["amount"]) + result = _py_generate_sync_id(tx) + return {"sync_id": result} + + +def capture_build_name_variants(inp: dict) -> dict: + result = _build_name_variants(inp["full_name"]) + return {"variants": result} + + +def capture_match_members(inp: dict) -> dict: + matches = match_members(inp["text"], inp["member_names"]) + return { + "matches": [{"name": name, "confidence": conf} for name, conf in matches] + } + + +def capture_infer_transaction_details(inp: dict) -> dict: + tx_in = inp["tx"] + tx = dict(tx_in) + tx["date"] = _decode_envelope(tx_in.get("date")) + result = infer_transaction_details(tx, inp["member_names"]) + return { + "matches": [{"name": n, "confidence": c} for n, c in result["members"]], + "months": result["months"], + "search_text": result.get("search_text", result.get("matched_text", "")), + } + + +def capture_format_date(inp: dict) -> dict: + val = _decode_envelope(inp["val"]) + result = format_date(val) + return {"date": result} + + +def _build_exceptions(exc_list): + """Convert seed exceptions to the dict reconcile() expects. + Accepts both the legacy list format [name, period, amount, note] and the + new dict format {"name": ..., "period": ..., "amount": ..., "note": ...}.""" + if not exc_list: + return {} + result = {} + for row in exc_list: + if isinstance(row, dict): + name = row.get("name", "") + period = row.get("period", "") + amount = row.get("amount", 0) + note = row.get("note", "") + else: + name, period, amount = row[0], row[1], row[2] + note = row[3] if len(row) > 3 else "" + result[(_norm(name), _norm(period))] = {"amount": int(amount), "note": note} + return result + + +def _member_fee_dict(fees_raw: dict) -> dict: + """Convert seed fees dict to the form reconcile() expects.""" + # Seeds store fees as [fee, count] lists (JSON) or (fee, count) tuples. + result = {} + for month, v in fees_raw.items(): + if isinstance(v, (list, tuple)) and len(v) == 2: + result[month] = (int(v[0]), int(v[1])) + else: + result[month] = int(v) + return result + + +def _tx_entry_out(tx): + """Convert a reconcile output TxEntry dict to a serializable form.""" + return { + "amount": float(tx.get("amount", 0)), + "date": tx.get("date", ""), + "sender": tx.get("sender", ""), + "message": tx.get("message", ""), + "confidence": tx.get("confidence", ""), + } + + +def _other_entry_out(e): + return { + "amount": float(e.get("amount", 0)), + "date": e.get("date", ""), + "sender": e.get("sender", ""), + "message": e.get("message", ""), + "purpose": e.get("purpose", ""), + "confidence": e.get("confidence", ""), + } + + +def _month_data_out(md): + return { + "expected": int(md["expected"]) if isinstance(md["expected"], (int, float)) else 0, + "original_expected": int(md["original_expected"]) if isinstance(md.get("original_expected"), (int, float)) else 0, + "attendance_count": int(md.get("attendance_count", 0)), + "exception": md.get("exception"), + "paid": float(md["paid"]), + "transactions": [_tx_entry_out(t) for t in md.get("transactions", [])], + } + + +def _unmatched_tx_out(tx): + return { + "date": tx.get("date", ""), + "amount": float(tx.get("amount", 0)), + "person": tx.get("person", ""), + "purpose": tx.get("purpose", ""), + "sender": tx.get("sender", ""), + "message": tx.get("message", ""), + "bank_id": tx.get("bank_id", ""), + } + + +def capture_reconcile(inp: dict) -> dict: + # Convert members from seed format to reconcile() format. + # Accepts both the new dict format {"name":..., "tier":..., "fees":{...}} + # and the legacy tuple format [name, tier, fees_dict]. + members_in = inp["members"] + members = [] + for m in members_in: + if isinstance(m, dict): + name, tier, fees_raw = m["name"], m["tier"], m.get("fees", {}) + else: + name, tier, fees_raw = m[0], m[1], m[2] + members.append((name, tier, _member_fee_dict(fees_raw))) + + exceptions = _build_exceptions(inp.get("exceptions") or []) + sorted_months = inp["sorted_months"] + transactions = inp["transactions"] + + result = reconcile(members, sorted_months, transactions, exceptions) + + members_out = {} + for name, mr in result["members"].items(): + members_out[name] = { + "tier": mr["tier"], + "months": {m: _month_data_out(md) for m, md in mr["months"].items()}, + "other_transactions": [_other_entry_out(e) for e in mr.get("other_transactions", [])], + "total_balance": int(mr["total_balance"]), + } + + return { + "members": members_out, + "unmatched": [_unmatched_tx_out(tx) for tx in result["unmatched"]], + "credits": {k: int(v) for k, v in result["credits"].items()}, + } + + +# --------------------------------------------------------------------------- +# Dispatcher +# --------------------------------------------------------------------------- + +_DISPATCHERS = { + "normalize": capture_normalize, + "parse_month_references": capture_parse_month_references, + "calculate_fee": capture_calculate_fee, + "calculate_junior_fee": capture_calculate_junior_fee, + "parse_czk_amount": capture_parse_czk_amount, + "generate_sync_id": capture_generate_sync_id, + "build_name_variants": capture_build_name_variants, + "match_members": capture_match_members, + "infer_transaction_details": capture_infer_transaction_details, + "format_date": capture_format_date, + "reconcile": capture_reconcile, +} + +_FUNC_MODULE = { + "normalize": "scripts.czech_utils.normalize", + "parse_month_references": "scripts.czech_utils.parse_month_references", + "calculate_fee": "scripts.attendance.calculate_fee", + "calculate_junior_fee": "scripts.attendance.calculate_junior_fee", + "parse_czk_amount": "scripts.infer_payments.parse_czk_amount", + "generate_sync_id": "scripts.sync_fio_to_sheets.generate_sync_id", + "build_name_variants": "scripts.match_payments._build_name_variants", + "match_members": "scripts.match_payments.match_members", + "infer_transaction_details": "scripts.match_payments.infer_transaction_details", + "format_date": "scripts.match_payments.format_date", + "reconcile": "scripts.match_payments.reconcile", +} + + +def _emit(func_name: str, case_id: str, inp: dict) -> None: + dispatch = _DISPATCHERS[func_name] + output = dispatch(inp) + doc = { + "case": case_id, + "func": _FUNC_MODULE[func_name], + "captured_at": datetime.date.today().isoformat(), + "input": inp, + "output": output, + } + print(json.dumps(doc, ensure_ascii=False)) + + +def _all_seeds(func_name: str): + """Yield (case_id, seed) for all seeds of a function.""" + for (fn, case_id), seed in seeds.SEEDS.items(): + if fn == func_name: + yield case_id, seed + + # Real-data seeds + if func_name == "parse_month_references": + yield from seeds.real_parse_month_references_seeds() + if func_name == "match_members": + yield from seeds.real_match_members_seeds() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Capture pure-function outputs as JSON fixtures." + ) + parser.add_argument( + "--func", required=True, choices=list(_DISPATCHERS), help="Function to capture." + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--case", help="Case ID (file stem). Use with --input-seed or --input-stdin.") + group.add_argument("--all", action="store_true", help="Emit all seeds for the function.") + parser.add_argument( + "--input-seed", metavar="SEED_ID", + help="Seed key in _fixture_seeds.SEEDS (required unless --input-stdin or --all).", + ) + parser.add_argument( + "--input-stdin", action="store_true", + help="Read input JSON from stdin instead of seed registry.", + ) + args = parser.parse_args() + + if args.all: + for case_id, seed in _all_seeds(args.func): + _emit(args.func, case_id, seed) + return + + # Single case + if args.input_stdin: + inp = json.load(sys.stdin) + elif args.input_seed: + key = (args.func, args.input_seed) + if key not in seeds.SEEDS: + sys.exit(f"Seed ({args.func!r}, {args.input_seed!r}) not found in _fixture_seeds.SEEDS") + inp = seeds.SEEDS[key] + else: + parser.error("Provide --input-seed SEED_ID or --input-stdin.") + + _emit(args.func, args.case, inp) + + +if __name__ == "__main__": + main() diff --git a/scripts/scrub_fixtures.py b/scripts/scrub_fixtures.py new file mode 100644 index 0000000..ca86ceb --- /dev/null +++ b/scripts/scrub_fixtures.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +"""Scrub PII from fixture JSON. + +Reads one JSON fixture from stdin (as produced by capture_fixtures.py), +replaces PII fields with deterministic pseudonyms, writes scrubbed JSON +to stdout. + +Run in the two-step pipeline: + python capture_fixtures.py ... | python scrub_fixtures.py > fixture.json + +Or process multiple lines (--multi for newline-delimited input): + python capture_fixtures.py --func foo --all | python scrub_fixtures.py --multi \\ + | while read line; do ... + +PII handling: + - Member names: replaced with Member_<8hex> (sha256(name)[:8]), deterministic. + - Senders / account numbers / VS / bank_id / user_id: stable digit-preserving hash. + - Notes (exception text): replaced with "". + - Messages: name-substring sweep applied; rest preserved. + - All other fields (dates, amounts, months, fees): preserved verbatim. + +Function-specific exceptions: + - match_members / infer_transaction_details: these functions are tested with + synthetic member names only. Only real-roster message sweeping is applied; + field-key scrubbing is skipped so Go can perform genuine name matching. + - generate_sync_id: after normal field-key scrubbing the output sync_id is + recomputed from the now-scrubbed inputs so the hash remains consistent. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import re +import sys +from typing import Any + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +_REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +# --------------------------------------------------------------------------- +# Bijection helpers +# --------------------------------------------------------------------------- + +def _sha256_hex(s: str) -> str: + return hashlib.sha256(s.encode("utf-8")).hexdigest() + + +def scrub_name(name: str) -> str: + """Deterministic pseudonym for a member name.""" + if not name: + return name + return f"Member_{_sha256_hex(name)[:8]}" + + +def scrub_id_digits(s: str) -> str: + """Length-preserving digit hash for VS, bank_id, user_id, etc.""" + s = str(s) + if not s: + return s + if re.match(r"^\d+$", s): + n = len(s) + hashed = int(_sha256_hex(s), 16) % (10 ** n) + return f"{hashed:0{n}d}" + return f"id_{_sha256_hex(s)[:8]}" + + +def scrub_account(s: str) -> str: + """Preserve Czech bank account format PREFIX/BANKCODE.""" + s = str(s) + if not s: + return s + m = re.match(r"^(\d+)/(\d{4})$", s) + if m: + prefix, bank = m.group(1), m.group(2) + n = len(prefix) + new_prefix = int(_sha256_hex(prefix), 16) % (10 ** n) + new_bank = int(_sha256_hex(bank), 16) % 10000 + return f"{new_prefix:0{n}d}/{new_bank:04d}" + return scrub_id_digits(s) + + +# --------------------------------------------------------------------------- +# Name roster for message sweeps +# --------------------------------------------------------------------------- + +def _load_member_names() -> list[str]: + """Load canonical names from the attendance cache (may not exist).""" + path = os.path.join(_REPO, "tmp", "attendance_regular_cache.json") + if not os.path.exists(path): + return [] + try: + with open(path, encoding="utf-8") as f: + cache = json.load(f) + rows = cache.get("data", []) + if rows and isinstance(rows[0], list): + rows = rows[0] + names = [] + for row in rows: + if isinstance(row, (list, tuple)) and len(row) >= 1: + names.append(str(row[0])) + return names + except Exception: + return [] + + +def _build_name_map(names: list[str]) -> dict[str, str]: + """Map each real name (and its normalized form) to its pseudonym.""" + mapping: dict[str, str] = {} + for name in names: + pseudo = scrub_name(name) + mapping[name] = pseudo + # Also add first+last without parenthetical nicknames + base = re.sub(r"\s*\([^)]*\)\s*", " ", name).strip() + if base != name: + mapping[base] = pseudo + return mapping + + +def _sweep_names_in_text(text: str, name_map: dict[str, str]) -> str: + """Replace real-name substrings in free text, longest match first.""" + # Sort descending by length so longer names replace before their substrings + for real in sorted(name_map, key=len, reverse=True): + if real and real in text: + text = text.replace(real, name_map[real]) + return text + + +# --------------------------------------------------------------------------- +# Scramble whitelist — only these keys are scrambled; everything else is kept +# --------------------------------------------------------------------------- + +_SCRAMBLE_KEYS = { + "name", + "member_names", + "person", + "sender", + "sender_account", + "account", + "vs", + "bank_id", + "user_id", + "note", +} + +# Dict keys whose *child keys* (not values) are member names and need scrubbing. +# e.g. the reconcile output: {"members": {"Alice Dvořák": {...}}, "credits": {"Alice Dvořák": 0}} +_MEMBER_KEY_DICTS = {"members", "credits"} + +_MESSAGE_KEYS = {"message", "text", "search_text"} + + +def _scrub_value(key: str, value: Any, name_map: dict[str, str]) -> Any: + """Scrub a single value based on its field key.""" + if isinstance(value, list): + if key == "member_names": + return [scrub_name(str(v)) for v in value] + # Don't propagate parent key into list elements — each element is an + # independent document. Propagating would incorrectly flag nested dicts + # (e.g. the fees dict inside a member tuple) as member-name-keyed dicts. + return [_scrub_doc(v, name_map) for v in value] + if isinstance(value, dict): + # Pass the current key as parent context so dicts like + # {"members": {"Real Name": ...}} get their keys scrubbed too. + return _scrub_doc(value, name_map, _parent_key=key) + if key not in _SCRAMBLE_KEYS and key not in _MESSAGE_KEYS: + return value + if not isinstance(value, str): + value = str(value) + if key in _MESSAGE_KEYS: + return _sweep_names_in_text(value, name_map) + if key == "name": + return scrub_name(value) + if key in ("sender_account", "account"): + return scrub_account(value) + if key == "note": + return "" + if key == "person": + # "person" may contain comma-separated member names (e.g. "Alice, Bob"). + # Sweep with name_map so each name gets its own consistent pseudonym, + # matching what the output.members keys will look like. + return _sweep_names_in_text(value, name_map) if value else value + # vs, bank_id, user_id, sender + return scrub_id_digits(value) if re.match(r"^\d+$", value) else scrub_name(value) if value else value + + +def _scrub_doc(doc: Any, name_map: dict[str, str], _parent_key: str = "") -> Any: + """Recursively scrub a JSON document.""" + if isinstance(doc, dict): + if _parent_key in _MEMBER_KEY_DICTS: + # Keys of this dict are member names — scrub the keys and recurse. + return { + scrub_name(k): _scrub_doc(v, name_map) + for k, v in doc.items() + } + return {k: _scrub_value(k, v, name_map) for k, v in doc.items()} + if isinstance(doc, list): + return [_scrub_doc(item, name_map) for item in doc] + return doc + + +# Functions where field-key scrubbing would break parity (name matching tests). +# Only real-roster message sweep is applied for these. +_NO_FIELD_SCRUB_FUNCS = { + "scripts.match_payments.match_members", + "scripts.match_payments.infer_transaction_details", +} + + +def _scrub_messages_only(doc: Any, name_map: dict[str, str]) -> Any: + """Sweep only message/text/search_text fields; leave all other values unchanged.""" + if isinstance(doc, dict): + return { + k: (_sweep_names_in_text(v, name_map) if k in _MESSAGE_KEYS and isinstance(v, str) + else _scrub_messages_only(v, name_map)) + for k, v in doc.items() + } + if isinstance(doc, list): + return [_scrub_messages_only(item, name_map) for item in doc] + return doc + + +def _recompute_sync_id(tx_scrubbed: dict) -> str: + """Recompute generate_sync_id hash from already-scrubbed tx fields. + + After the scrubber changes sender/vs/bank_id the original hash is invalid. + Replicates the Python generate_sync_id formula (pipe-separated, lowercased) + and always treats amount as float64 to match Go's formatAmount behaviour. + """ + envelope = tx_scrubbed.get("amount", {}) + if isinstance(envelope, dict): + t = envelope.get("type", "") + v = envelope.get("value") + if t in ("int", "float"): + amount = float(v) # always float — matches Go's formatAmount + else: + amount = "" + else: + amount = float(envelope) if envelope not in (None, "") else "" + + currency = tx_scrubbed.get("currency", "") or "CZK" + components = [ + str(tx_scrubbed.get("date", "")), + str(amount), + currency, + str(tx_scrubbed.get("sender", "")), + str(tx_scrubbed.get("vs", "")), + str(tx_scrubbed.get("message", "")), + str(tx_scrubbed.get("bank_id", "")), + ] + raw_str = "|".join(components).lower() + return hashlib.sha256(raw_str.encode("utf-8")).hexdigest() + + +def _extract_inline_names(doc: Any) -> list[str]: + """Extract names from member_names and 'name' fields in the fixture itself.""" + names: list[str] = [] + if isinstance(doc, dict): + for k, v in doc.items(): + if k == "member_names" and isinstance(v, list): + names.extend(str(n) for n in v) + elif k == "name" and isinstance(v, str): + names.append(v) + else: + names.extend(_extract_inline_names(v)) + elif isinstance(doc, list): + for item in doc: + names.extend(_extract_inline_names(item)) + return names + + +def scrub_fixture(doc: dict) -> dict: + """Scrub a single fixture document in-place (returns new dict).""" + roster_names = _load_member_names() + inline_names = _extract_inline_names(doc) + all_names = list(dict.fromkeys(roster_names + inline_names)) + name_map = _build_name_map(all_names) + + func = doc.get("func", "") + + # match_members / infer_transaction_details: tested with synthetic names only. + # Field-key scrubbing would make member_names pseudonyms inconsistent with + # the text, breaking Go's name-matching assertions. Only sweep messages. + if func in _NO_FIELD_SCRUB_FUNCS: + # Synthetic member names only — no field scrubbing, no message sweep. + # Any sweep would create inconsistency between scrubbed output fields + # (search_text) and un-scrubbed input fields (sender, member_names). + return _scrub_messages_only(doc, {}) + + result = _scrub_doc(doc, name_map) + + # generate_sync_id: recompute hash from the now-scrubbed inputs so the + # fixture is self-consistent (scrubbed fields → Go hashes scrubbed values). + if func.endswith("generate_sync_id"): + result["output"]["sync_id"] = _recompute_sync_id(result["input"].get("tx", {})) + + return result + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main() -> None: + parser = argparse.ArgumentParser(description="Scrub PII from fixture JSON.") + parser.add_argument( + "--multi", action="store_true", + help="Process newline-delimited JSON (one object per line) from stdin.", + ) + args = parser.parse_args() + + if args.multi: + for line in sys.stdin: + line = line.strip() + if not line: + continue + doc = json.loads(line) + print(json.dumps(scrub_fixture(doc), ensure_ascii=False)) + else: + doc = json.load(sys.stdin) + out = scrub_fixture(doc) + print(json.dumps(out, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main()