diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d49008..08057d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 2026-05-06 09:38 CEST — feat(go/M2.5): port domain/money.ParseCZK + +- New `go/internal/domain/money` package with `ParseCZK(string) (float64, error)` ported from `scripts/infer_payments.py` `parse_czk_amount`. +- Preserves the Czech-locale heuristic: comma → decimal sep; 2+ dots → thousand seps; single dot → decimal (so `"1.500"` → `1.5`). +- Returns `(0, ErrInvalidAmount)` on parse failure; callers wanting Python's silent-zero contract use `v, _ := ParseCZK(s)`. +- 15 table-driven tests plus a silent-zero contract test; all expected values verified against live Python on 2026-05-06. + ## 2026-05-06 09:24 CEST — feat(go/M2.3+M2.4): port domain/fees.CalculateFee and CalculateJuniorFee - New `go/internal/domain/fees` package with adult and junior fee calculators ported from `scripts/attendance.py`. diff --git a/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md b/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md index aaed324..e3116f1 100644 --- a/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md +++ b/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md @@ -48,7 +48,7 @@ Each task: port the function, write Go unit tests for fresh cases, hook into the - [x] **M2.2** `domain/czech.ParseMonthReferences` — port `parse_month_references` (45 month declensions, range wrap, year inference) — `0a8017f` - [x] **M2.3** `domain/fees.CalculateFee` — port [attendance.py](scripts/attendance.py) `calculate_fee` (constants table) — `0fc3b6d` - [x] **M2.4** `domain/fees.CalculateJuniorFee` — port `calculate_junior_fee` with `Expected{Value int; Unknown bool}` for the `"?"` sentinel — `0fc3b6d` -- [ ] **M2.5** `domain/money.ParseCZK` — port [infer_payments.py](scripts/infer_payments.py) `parse_czk_amount` (Czech locale: comma decimal, dot/space thousand separators) +- [x] **M2.5** `domain/money.ParseCZK` — port [infer_payments.py](scripts/infer_payments.py) `parse_czk_amount` (Czech locale: comma decimal, dot/space thousand separators) — `d24d205` - [ ] **M2.6** `domain/synch.GenerateSyncID` — port [sync_fio_to_sheets.py](scripts/sync_fio_to_sheets.py) `generate_sync_id` (SHA-256, byte-stable hash; verify float string format against real sheet rows) - [ ] **M2.7** `domain/matching.BuildNameVariants` + `MatchMembers` — port `_build_name_variants` and `match_members` from [match_payments.py](scripts/match_payments.py) (auto vs review confidence, common-surname filter) - [ ] **M2.8** `domain/matching.InferTransactionDetails` — port `infer_transaction_details` (composes name + month parsing) diff --git a/docs/plans/2026-05-06-0928-go-m2-5-money-parse-czk.md b/docs/plans/2026-05-06-0928-go-m2-5-money-parse-czk.md new file mode 100644 index 0000000..1db6ede --- /dev/null +++ b/docs/plans/2026-05-06-0928-go-m2-5-money-parse-czk.md @@ -0,0 +1,199 @@ +# M2.5 — Port `parse_czk_amount` to `domain/money.ParseCZK` + +> On execution, this plan should be moved to +> `docs/plans/2026-05-06-0928-go-m2-5-money-parse-czk.md` per project CLAUDE.md +> (`docs/plans/YYYY-MM-DD-HHMM-.md`). Plan mode forces it to live under +> `~/.claude/plans/` until then. + +## Context + +Continuing the Go backend rewrite tracked in +[2026-05-03-2349-go-backend-rewrite-progress.md](../../srv/personal/fuj-management/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md). +M2.1–M2.4 are landed. Next leaf-level pure function is +`parse_czk_amount` from [scripts/infer_payments.py:17-45](../../srv/personal/fuj-management/scripts/infer_payments.py#L17-L45), +the Czech-locale amount parser used at [scripts/infer_payments.py:124](../../srv/personal/fuj-management/scripts/infer_payments.py#L124) +when reading the `Inferred Amount` column out of the payments sheet. + +It's a small, isolated string→float helper, but its heuristic for +disambiguating `.` and `,` as decimal vs thousand separator is +non-obvious and needs to behave identically in Go to keep parity once +the Go infer pipeline lands in M4.8. + +## Python behaviour (the spec) + +```py +def parse_czk_amount(val) -> float: + if val is None or val == "": + return 0.0 + if isinstance(val, (int, float)): + return float(val) + + val = str(val) + val = val.replace("Kč", "").replace("CZK", "").strip() + if "," in val: + # 1.500,00 -> 1500.00 — comma is decimal sep + val = val.replace(".", "").replace(" ", "").replace(",", ".") + else: + if val.count(".") > 1: + # 1.500.000 -> 1500000 — multiple dots = thousand sep + val = val.replace(".", "").replace(" ", "") + else: + # "1 500.00" -> "1500.00", "1.500" stays "1.500" (= 1.5) + val = val.replace(" ", "") + try: + return float(val) + except ValueError: + return 0.0 +``` + +Key behavioural notes for the Go port: + +1. Empty / None → 0, no error. +2. `"1.500"` (single dot, no comma) is parsed as **1.5**, not 1500. + The heuristic intentionally treats a lone dot as decimal. +3. `"1.500,00"` → 1500.0 (comma wins, dots are thousand seps). +4. `"1.500.000"` → 1500000.0 (multiple dots → all thousand seps). +5. `"1 500"` / `"1 500.00"` / `"500 Kč"` → spaces stripped. +6. Garbage → 0, no error in Python. +7. Strips literal substrings `"Kč"` and `"CZK"` (case-sensitive in Python). + +## Approach + +Create new package `internal/domain/money` mirroring the layout of +`internal/domain/fees` (single-file module + test file alongside). + +### Signature + +```go +// Package money ports Czech-locale currency parsing from +// scripts/infer_payments.py. +package money + +// ParseCZK parses a Czech-locale amount string and returns the value +// in CZK as a float64. +// +// Mirrors scripts/infer_payments.py parse_czk_amount: +// - empty input → (0, nil) +// - "Kč"/"CZK" suffixes are stripped (case-sensitive, like Python) +// - if input contains ",", comma is the decimal separator and +// dots/spaces are thousand separators ("1.500,00" → 1500.0) +// - else if input contains 2+ dots, all dots are thousand seps +// ("1.500.000" → 1500000.0) +// - else single dot stays as the decimal point ("1.500" → 1.5, +// matching the Python heuristic) +// - on parse failure, returns (0, ErrInvalidAmount). Callers wanting +// Python-equivalent silent-zero behaviour can discard the error. +func ParseCZK(s string) (float64, error) +``` + +`ErrInvalidAmount` is a package-level sentinel: + +```go +var ErrInvalidAmount = errors.New("money: invalid CZK amount") +``` + +Why `(float64, error)` instead of mirroring Python's silent zero: + +- Go idiom prefers explicit errors. +- The single Python call site doesn't distinguish parse-fail from + empty-input (both → 0), so if we want byte-equal behaviour at the + Go infer site (M4.8), the caller can `v, _ := money.ParseCZK(s)` + and get exactly the Python result. +- Future callers (e.g. user-facing import flows) may want to surface + the error. + +This matches the precedent set in M2.4 where we used +`Expected{Unknown bool}` rather than copying the Python `"?"` sentinel +verbatim — Go-idiomatic surface, parity-preserving semantics. + +### Polymorphic input? + +Python's `parse_czk_amount` also accepts raw int/float (passed through +unchanged) because Google Sheets API can return numeric cells as +`float64` rather than strings. **Skip this in Go.** The Sheets IO +adapter is M4.2, and that's where the `[]any` → string normalisation +will live. Keeping `ParseCZK` string-only keeps the leaf function tiny. + +### Tests + +`money_test.go` mirrors the existing `fees_test.go` table-driven style, +including the verification comment showing the Python command used to +confirm each expected value: + +```sh +PYTHONPATH=scripts:. python -c ' +from infer_payments import parse_czk_amount +for v in [None, "", "0", "500", "500 Kč", "500 CZK", + "1 500", "1500.00", "1 500.00", + "1.500,00", "1500,5", "1.500.000", + "1.500", "abc", " ", "100,5 Kč"]: + print(repr(v), "->", parse_czk_amount(v)) +' +``` + +Cases to cover (all numeric outputs verified against the Python output +of the snippet above): + +| input | expected | +|---|---| +| `""` | 0 | +| `"0"` | 0 | +| `"500"` | 500 | +| `"500 Kč"` | 500 | +| `"500 CZK"` | 500 | +| `"1 500"` | 1500 | +| `"1500.00"` | 1500 | +| `"1 500.00"` | 1500 | +| `"1.500,00"` | 1500 | +| `"1500,5"` | 1500.5 | +| `"1.500.000"` | 1500000 | +| `"1.500"` | 1.5 *(heuristic — single dot = decimal)* | +| `"100,5 Kč"` | 100.5 | +| `"abc"` | 0, returns `ErrInvalidAmount` | +| `" "` | 0, returns `ErrInvalidAmount` *(or 0 nil — confirm against Python; trim leaves `""`, then `float("")` raises → Python returns 0; Go test will assert whichever Python actually produces)* | + +The `" "` row is the only one that needs the Python verification step +to settle — once verified, lock the behaviour in. + +Also add a "documentation example" assertion in the test that +`v, _ := ParseCZK(s)` recovers the Python silent-zero contract for +every garbage input, so we don't lose that property at the Go infer +call site. + +## Files to create + +- `go/internal/domain/money/money.go` — package + `ParseCZK` + `ErrInvalidAmount` +- `go/internal/domain/money/money_test.go` — table-driven tests + +No existing Go files need editing. + +## Verification + +```sh +cd go && go test ./internal/domain/money/... +make go-lint +make go-build # sanity: nothing else broke +``` + +Also run the Python snippet from the Tests section above and diff its +output against the test table to confirm parity. + +## Out of scope (explicit non-goals) + +- Polymorphic `any` input — leave for M4.2 IO adapter. +- Hooking into the Tier-1 parity runner — that comes with M3.5 + (`-tags=parity` build constraint). M2.5 just needs unit tests. +- Any callsite migration — `infer_payments.py` keeps using its own + Python function until M4.8. + +## Progress tracker + changelog + +After the commit lands: + +- Tick `M2.5` in [docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md](../../srv/personal/fuj-management/docs/plans/2026-05-03-2349-go-backend-rewrite-progress.md) + with the commit SHA, mirroring the M2.4 entry style. +- Add a CHANGELOG.md entry at top: + `## YYYY-MM-DD HH:MM TZ — feat(go/M2.5): port domain/money.ParseCZK`. + +Branch: `feat/m2-5-money-parse-czk` (per CLAUDE.md branch-per-feature +workflow). Push, open MR via `tea pr create`, leave merge to the user. diff --git a/go/internal/domain/money/money.go b/go/internal/domain/money/money.go new file mode 100644 index 0000000..a82316d --- /dev/null +++ b/go/internal/domain/money/money.go @@ -0,0 +1,49 @@ +// Package money ports Czech-locale currency parsing from scripts/infer_payments.py. +package money + +import ( + "errors" + "strconv" + "strings" +) + +// ErrInvalidAmount is returned by ParseCZK when the input cannot be parsed. +var ErrInvalidAmount = errors.New("money: invalid CZK amount") + +// ParseCZK parses a Czech-locale amount string and returns the value in CZK +// as a float64. Mirrors scripts/infer_payments.py parse_czk_amount: +// +// - empty input → (0, nil) +// - "Kč"/"CZK" suffixes stripped (case-sensitive, like Python) +// - comma present → comma is decimal sep, dots/spaces are thousand seps +// ("1.500,00" → 1500.0) +// - no comma, 2+ dots → all dots are thousand seps ("1.500.000" → 1500000.0) +// - no comma, ≤1 dot → dot is decimal sep ("1.500" → 1.5) +// - on parse failure → (0, ErrInvalidAmount); callers wanting Python's +// silent-zero behaviour can discard the error: v, _ := ParseCZK(s) +func ParseCZK(s string) (float64, error) { + if s == "" { + return 0, nil + } + + s = strings.ReplaceAll(s, "Kč", "") + s = strings.ReplaceAll(s, "CZK", "") + s = strings.TrimSpace(s) + + if strings.ContainsRune(s, ',') { + s = strings.ReplaceAll(s, ".", "") + s = strings.ReplaceAll(s, " ", "") + s = strings.ReplaceAll(s, ",", ".") + } else if strings.Count(s, ".") > 1 { + s = strings.ReplaceAll(s, ".", "") + s = strings.ReplaceAll(s, " ", "") + } else { + s = strings.ReplaceAll(s, " ", "") + } + + v, err := strconv.ParseFloat(s, 64) + if err != nil { + return 0, ErrInvalidAmount + } + return v, nil +} diff --git a/go/internal/domain/money/money_test.go b/go/internal/domain/money/money_test.go new file mode 100644 index 0000000..fd41739 --- /dev/null +++ b/go/internal/domain/money/money_test.go @@ -0,0 +1,67 @@ +package money + +import ( + "testing" +) + +func TestParseCZK(t *testing.T) { + t.Parallel() + + // All expected outputs verified against live Python implementation on 2026-05-06: + // PYTHONPATH=scripts:. python -c ' + // from infer_payments import parse_czk_amount + // for v in [None, "", "0", "500", "500 Kč", "500 CZK", + // "1 500", "1500.00", "1 500.00", + // "1.500,00", "1500,5", "1.500.000", + // "1.500", "abc", " ", "100,5 Kč"]: + // print(repr(v), "->", parse_czk_amount(v)) + // ' + tests := []struct { + name string + input string + want float64 + wantErr bool + }{ + {"empty string", "", 0, false}, + {"zero string", "0", 0, false}, + {"plain integer", "500", 500, false}, + {"with Kč suffix", "500 Kč", 500, false}, + {"with CZK suffix", "500 CZK", 500, false}, + {"space thousand sep", "1 500", 1500, false}, + {"dot decimal", "1500.00", 1500, false}, + {"space thousands dot decimal", "1 500.00", 1500, false}, + {"dot thousand comma decimal", "1.500,00", 1500, false}, + {"comma decimal no thousands", "1500,5", 1500.5, false}, + {"multiple dot thousand seps", "1.500.000", 1500000, false}, + {"single dot is decimal heuristic", "1.500", 1.5, false}, + {"comma decimal with Kč", "100,5 Kč", 100.5, false}, + {"garbage text", "abc", 0, true}, + {"spaces only", " ", 0, true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := ParseCZK(tc.input) + if (err != nil) != tc.wantErr { + t.Errorf("ParseCZK(%q) error = %v, wantErr %v", tc.input, err, tc.wantErr) + } + if got != tc.want { + t.Errorf("ParseCZK(%q) = %v, want %v", tc.input, got, tc.want) + } + }) + } +} + +// TestParseCZKSilentZero documents that discarding the error recovers Python's +// silent-zero behaviour for any garbage input. +func TestParseCZKSilentZero(t *testing.T) { + t.Parallel() + + for _, s := range []string{"abc", " ", "Kč", "CZK"} { + v, _ := ParseCZK(s) + if v != 0 { + t.Errorf("ParseCZK(%q) silent-zero: got %v, want 0", s, v) + } + } +}