feat(go/M2.2): port czech.ParseMonthReferences
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Three-pass regex parser matching python/czech_utils.py parse_month_references: 1. Numeric slash notation — "11+12/2025", "01/26"; 2-digit year → +2000 2. Dot notation — "12.2025" (4-digit year only) 3. Czech month names — range walk (listopad-leden wrap logic) then standalone with m≥10 → defaultYear-1 heuristic; longest-match alternation (sorted desc by name length) handles cervenec vs cerven 35 table-driven tests, all expected outputs verified against live Python on 2026-05-05 before locking. Plan at docs/plans/2026-05-05-2337-go-rewrite-m2-2-parse-month-references.md. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
244
go/internal/domain/czech/parse_month_references_test.go
Normal file
244
go/internal/domain/czech/parse_month_references_test.go
Normal file
@@ -0,0 +1,244 @@
|
||||
package czech
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseMonthReferences(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// All expected outputs verified against live Python implementation on 2026-05-05:
|
||||
// PYTHONPATH=scripts:. python -c 'from czech_utils import parse_month_references; print(parse_month_references("<input>", 2026))'
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
defaultYear int
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
input: "",
|
||||
defaultYear: 2026,
|
||||
want: []string{},
|
||||
},
|
||||
{
|
||||
name: "numeric plus-split two months full year",
|
||||
input: "11+12/2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12"},
|
||||
},
|
||||
{
|
||||
name: "numeric single month full year",
|
||||
input: "1/2026",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
{
|
||||
name: "numeric 2-digit year",
|
||||
input: "01/26",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
{
|
||||
name: "numeric plus-split with 2-digit year",
|
||||
input: "11+12/25",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12"},
|
||||
},
|
||||
{
|
||||
name: "numeric three months sorted",
|
||||
input: "12+1+2/2026",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01", "2026-02", "2026-12"},
|
||||
},
|
||||
{
|
||||
name: "dot pattern",
|
||||
input: "12.2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "dot pattern requires 4-digit year",
|
||||
input: "1.26",
|
||||
defaultYear: 2026,
|
||||
want: []string{},
|
||||
},
|
||||
{
|
||||
name: "standalone month below m10 threshold",
|
||||
input: "leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
{
|
||||
name: "standalone month m10 heuristic",
|
||||
input: "prosinec",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "declension prosince",
|
||||
input: "prosince",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "declension lednu",
|
||||
input: "lednu",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
{
|
||||
name: "standalone m10 boundary (rijen = October)",
|
||||
input: "rijen",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-10"},
|
||||
},
|
||||
{
|
||||
name: "standalone m9 just below boundary (zari = September)",
|
||||
input: "zari",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-09"},
|
||||
},
|
||||
{
|
||||
name: "range wrap Nov-Jan",
|
||||
input: "listopad-leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12", "2026-01"},
|
||||
},
|
||||
{
|
||||
name: "range wrap starting at October",
|
||||
input: "rijen-leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-10", "2025-11", "2025-12", "2026-01"},
|
||||
},
|
||||
{
|
||||
name: "range no wrap",
|
||||
input: "unor-kveten",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-02", "2026-03", "2026-04", "2026-05"},
|
||||
},
|
||||
{
|
||||
name: "degenerate range same month",
|
||||
input: "leden-leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
{
|
||||
name: "range spanning m10 — heuristic does NOT fire for range members",
|
||||
input: "unor-listopad",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-02", "2026-03", "2026-04", "2026-05", "2026-06", "2026-07", "2026-08", "2026-09", "2026-10", "2026-11"},
|
||||
},
|
||||
{
|
||||
name: "longest-match alternation cervenec beats cerven",
|
||||
input: "cervenec-srpen",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-07", "2026-08"},
|
||||
},
|
||||
{
|
||||
name: "range plus standalone — range excludes, dedup",
|
||||
input: "listopad-leden, prosinec",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12", "2026-01"},
|
||||
},
|
||||
{
|
||||
name: "two standalones no range",
|
||||
input: "prosinec leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12", "2026-01"},
|
||||
},
|
||||
{
|
||||
name: "numeric plus range mix",
|
||||
input: "11+12/2025, leden-brezen",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12", "2026-01", "2026-02", "2026-03"},
|
||||
},
|
||||
{
|
||||
name: "dedup across numeric and standalone passes",
|
||||
input: "11+12/25 a listopad",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12"},
|
||||
},
|
||||
{
|
||||
name: "no digits before slash — standalone fires instead",
|
||||
input: "prosince/2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "range with trailing slash-year — numeric fails, range wins",
|
||||
input: "listopad-prosinec/2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-11", "2026-12"},
|
||||
},
|
||||
{
|
||||
name: "dot pattern only — numeric matches but month out of 1-12 range",
|
||||
input: "01.2026 / 02.2026",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01", "2026-02"},
|
||||
},
|
||||
{
|
||||
name: "leading slash — numeric matches at second slash",
|
||||
input: "/12/2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "uppercase input normalized",
|
||||
input: "PROSINEC",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "diacritics stripped by Normalize",
|
||||
input: "Žluťoučký prosinec",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-12"},
|
||||
},
|
||||
{
|
||||
name: "diacritics in range with spaces around dash",
|
||||
input: "Únor - květen",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-02", "2026-03", "2026-04", "2026-05"},
|
||||
},
|
||||
{
|
||||
name: "natural language mixed with numeric and standalone",
|
||||
input: "platba 11/2025 a leden",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2026-01"},
|
||||
},
|
||||
{
|
||||
name: "English month name not recognized",
|
||||
input: "December",
|
||||
defaultYear: 2026,
|
||||
want: []string{},
|
||||
},
|
||||
{
|
||||
name: "duplicate input deduped",
|
||||
input: "11+12/2025 11+12/2025",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2025-11", "2025-12"},
|
||||
},
|
||||
{
|
||||
name: "trailing year without separator ignored",
|
||||
input: "leden 2026",
|
||||
defaultYear: 2026,
|
||||
want: []string{"2026-01"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := ParseMonthReferences(tc.input, tc.defaultYear)
|
||||
if got == nil {
|
||||
got = []string{}
|
||||
}
|
||||
if !reflect.DeepEqual(got, tc.want) {
|
||||
t.Errorf("ParseMonthReferences(%q, %d)\n got %v\n want %v",
|
||||
tc.input, tc.defaultYear, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user