From c5a8a4e7b193e88252a45cb9980dbcf61c2e95cf Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Wed, 6 May 2026 16:38:21 +0200 Subject: [PATCH] fix: include juniors in payment-inference roster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit infer_payments was building member_names from get_members_with_fees() (adults sheet only). Junior-only members were invisible to the matcher, so a payment message containing an exact junior name would produce a fuzzy review match against a different adult sharing the same first name. Fix: union the adult and junior rosters (deduped via canonical_member_key) so all members are candidates. The existing exact-name short-circuit in match_members then handles precedence correctly. Two regression tests added for the Jáchym Kubík / Jáchym Hrušák case. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 6 + ...05-06-1626-infer-payments-junior-roster.md | 129 ++++++++++++++++++ scripts/infer_payments.py | 19 ++- tests/test_match_members.py | 19 +++ 4 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 docs/plans/2026-05-06-1626-infer-payments-junior-roster.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e7009b9..8d05004 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 2026-05-06 16:38 CEST — fix: include juniors in payment-inference roster + +- `scripts/infer_payments.py`: union adults + junior rosters so junior-only members are visible to the matcher. +- Root cause: `get_members_with_fees()` reads only the adults sheet; junior-only kids like Jáchym Kubík were absent from `member_names`, causing the exact-match short-circuit to never fire and a different adult sharing the first name to win via fuzzy review. +- Two regression tests added to `tests/test_match_members.py`. + ## 2026-05-06 13:18 CEST — feat(go/M2.7-2.9): port domain/matching package - New `go/internal/domain/matching` package porting three helpers from `scripts/match_payments.py`. diff --git a/docs/plans/2026-05-06-1626-infer-payments-junior-roster.md b/docs/plans/2026-05-06-1626-infer-payments-junior-roster.md new file mode 100644 index 0000000..be158a1 --- /dev/null +++ b/docs/plans/2026-05-06-1626-infer-payments-junior-roster.md @@ -0,0 +1,129 @@ +# Include junior members in payment inference roster + +## Context + +A bank payment from sender `JIŘÍ KUBÍK` with the message +`Jáchym Kubík: 01/2026+03/2026+04/2026` is being inferred as +`[?] Jáchym Hrušák (G)` instead of the obvious `Jáchym Kubík`, even though +the message contains his exact full name. + +**Root cause** (confirmed with the user): `Jáchym Kubík` is in the **junior** +attendance sheet only — he does not appear on the main/adults sheet. But +[scripts/infer_payments.py:101-102](scripts/infer_payments.py#L101-L102) +builds `member_names` by calling `get_members_with_fees()` +([scripts/attendance.py:170](scripts/attendance.py#L170)), which reads only +`EXPORT_URL` (the adults sheet). Junior-only members are therefore invisible +to the matcher. + +With Kubík absent from `member_names`, the matcher in +[scripts/match_payments.py:65](scripts/match_payments.py#L65) processes the +combined text `jiri kubik jachym kubik: 01/2026+03/2026+04/2026` against an +adults-only roster: + +- The exact-full-name short-circuit (`match_payments.py:75-84`) finds nothing — + no adult's full name is in the text. +- Hrušák `(G)` is the only adult with first name `Jáchym`. He fails the + auto-rules (his surname isn't in the text) but hits the partial-first-name + review rule (`match_payments.py:123-125`) → returned as `("Jáchym Hrušák (G)", + "review")`, rendered as `[?] Jáchym Hrušák (G)`. + +The user's original framing — "exact match in message should win over +everything" — is already implemented for any candidate that **is** in the +roster (the May-04 short-circuit). The bug is upstream: the right candidate +was never even considered. + +**Goal:** make `infer_payments` consider junior members as candidates, so +junior-only names like `Jáchym Kubík` get matched correctly. + +## Approach + +Single-file change in [scripts/infer_payments.py](scripts/infer_payments.py). + +Replace the adults-only roster lookup with a union of the adult and junior +rosters. `attendance.py` already exposes both: +[`get_members_with_fees()`](scripts/attendance.py#L170) for adults (and tier-J +juniors who train with adults) and +[`get_junior_members_with_fees()`](scripts/attendance.py#L208) for everyone in +the junior sheet. + +### Edit at [scripts/infer_payments.py:15](scripts/infer_payments.py#L15) + +```python +from attendance import get_members_with_fees, get_junior_members_with_fees +``` + +### Edit at [scripts/infer_payments.py:99-102](scripts/infer_payments.py#L99-L102) + +```python +print("Fetching member list for matching...") +adult_members, _ = get_members_with_fees() +junior_members, _ = get_junior_members_with_fees() + +# Union rosters, preserving first-seen order, deduping by canonical key +seen: set[str] = set() +member_names: list[str] = [] +for m in adult_members + junior_members: + name = m[0] + key = canonical_member_key(name) + if key in seen: + continue + seen.add(key) + member_names.append(name) +``` + +`canonical_member_key` already lives in +[scripts/match_payments.py:20](scripts/match_payments.py#L20) — import it +alongside `infer_transaction_details`. It normalizes diacritics/case/whitespace, +so `"Maria Maco"` and `"Mária Maco"` collapse to the same key. + +### Why downstream reconciliation still works + +`reconcile()` is invoked twice per page — once with the adults roster +([app.py:200](app.py#L200)) and once with the juniors roster +([app.py:384](app.py#L384)). Each call resolves the `Person` cell against its +own roster; a junior name resolves cleanly in the juniors call and lands in +"unmatched" in the adults call. That's already the existing behavior for any +junior payment manually entered into the `Person` column, so no further +changes are needed. + +### Files to modify + +- [scripts/infer_payments.py](scripts/infer_payments.py) — only the + import + roster construction. ~10-line change. + +### Files to read for confidence (no edits) + +- [scripts/attendance.py:208-289](scripts/attendance.py#L208-L289) — + `get_junior_members_with_fees` returns `(name, tier, …)` tuples just like + the adults version, so `m[0]` works for both. +- [scripts/match_payments.py:65-137](scripts/match_payments.py#L65-L137) — + `match_members` already handles the precedence the user wants (exact full-name + short-circuit), so once Kubík is in `member_names`, the case will be auto-matched + with no `[?]`. + +## Verification + +1. **Manual sanity** — re-run inference on the offending row: + - Clear `Person`/`Purpose` for the Kubík row in the payments sheet. + - `make infer`. + - Expect `Person = Jáchym Kubík`, `Purpose = 2026-01, 2026-03, 2026-04`, + no `[?]`. + +2. **Unit test** — extend + [tests/test_match_members.py](tests/test_match_members.py) (or add a small + `tests/test_infer_payments.py`) to assert that, given a roster that + includes `Jáchym Hrušák (G)` and `Jáchym Kubík`, the message + `Jáchym Kubík: 01/2026+03/2026+04/2026` resolves to + `[("Jáchym Kubík", "auto")]` only. This is really a regression test for + the May-04 short-circuit — the new behavior under test is just that + `infer_payments` now feeds in juniors. + +3. **Run the suite**: `make test`. + +4. **Dashboard smoke** — `make web`, open `/payments`, confirm the row now + shows the correct member; open `/juniors`, confirm the payment is + credited to Kubík for the three months listed. + +5. **Changelog** — once the user confirms the fix, append an entry to + [CHANGELOG.md](CHANGELOG.md) per [CLAUDE.md](CLAUDE.md): + `## YYYY-MM-DD HH:MM TZ — fix: include juniors in payment-inference roster`. diff --git a/scripts/infer_payments.py b/scripts/infer_payments.py index 0d65dd9..da333b1 100644 --- a/scripts/infer_payments.py +++ b/scripts/infer_payments.py @@ -11,8 +11,8 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__))) from googleapiclient.discovery import build from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID -from match_payments import infer_transaction_details -from attendance import get_members_with_fees +from match_payments import infer_transaction_details, canonical_member_key +from attendance import get_members_with_fees, get_junior_members_with_fees def parse_czk_amount(val) -> float: """Parse Czech currency string or handle raw numeric value.""" @@ -96,10 +96,19 @@ def infer_payments(spreadsheet_id: str, credentials_path: str, dry_run: bool = F print(f"Current header: {header}") return - # 2. Fetch members for matching + # 2. Fetch members for matching — union adults + juniors so junior-only + # members (e.g. kids not on the adult sheet) are visible to the matcher. print("Fetching member list for matching...") - members_data, _ = get_members_with_fees() - member_names = [m[0] for m in members_data] + adult_members, _ = get_members_with_fees() + junior_members, _ = get_junior_members_with_fees() + + seen: set[str] = set() + member_names: list[str] = [] + for m in adult_members + junior_members: + key = canonical_member_key(m[0]) + if key not in seen: + seen.add(key) + member_names.append(m[0]) # 3. Process rows print("Inferring details for empty rows...") diff --git a/tests/test_match_members.py b/tests/test_match_members.py index f5b463c..6071d1a 100644 --- a/tests/test_match_members.py +++ b/tests/test_match_members.py @@ -48,6 +48,25 @@ class TestMatchMembersExact(unittest.TestCase): names = [r[0] for r in result] self.assertIn("Tomáš Němeček (Tov)", names) + def test_shared_first_name_junior_in_roster_wins_exact(self): + # Regression: two members share first name "Jáchym"; message has full name + # of the junior-only member → exact match must win, no [?] on the adult. + roster = ["Jáchym Hrušák (G)", "Jáchym Kubík"] + result = match_members( + "JIŘÍ KUBÍK Jáchym Kubík: 01/2026+03/2026+04/2026", roster + ) + self.assertEqual(result, [("Jáchym Kubík", "auto")]) + + def test_shared_first_name_without_junior_in_roster_falls_back(self): + # Without Kubík in the roster (old behaviour), Hrušák wins via first-name + # partial match — confirms the roster-expansion fix is the real solution. + roster = ["Jáchym Hrušák (G)"] + result = match_members( + "JIŘÍ KUBÍK Jáchym Kubík: 01/2026+03/2026+04/2026", roster + ) + names = [r[0] for r in result] + self.assertIn("Jáchym Hrušák (G)", names) + if __name__ == "__main__": unittest.main()