feat(go/M2.7-2.9): port domain/matching package
New go/internal/domain/matching package porting three helpers from scripts/match_payments.py: - BuildNameVariants: normalized ASCII variants from a member name (nickname in parens, last/first split, len<3 filtered); variants[0] is always the full base name — MatchMembers relies on this invariant. - MatchMembers: auto/review confidence matching with an exact-name short-circuit pass that prevents nickname substrings (tov) from firing inside longer surnames (ottova); common-surname filter for review tier. - FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional days supported)/YYYY-MM-DD passthrough/garbage → never errors. - InferTransactionDetails: composes BuildNameVariants+MatchMembers+ ParseMonthReferences; falls back to sender-only member match and date-derived month when text carries no signal. 21 table-driven tests; all expected values verified against live Python on 2026-05-06. go-build, go-test, go-lint all clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
156
go/internal/domain/matching/match_members_test.go
Normal file
156
go/internal/domain/matching/match_members_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package matching
|
||||
|
||||
// Expected values verified against scripts/match_payments.py and
|
||||
// tests/test_match_members.py on 2026-05-06:
|
||||
//
|
||||
// PYTHONPATH=scripts:. python3 -c '
|
||||
// from match_payments import match_members
|
||||
// MEMBERS = ["Henrietta Ottová", "Tomáš Němeček (Tov)", "František Vrbík (Štrúdl)", "Jana Nováková"]
|
||||
// cases = [
|
||||
// ("Henrietta Ottová (Heny): 04/2026", "full name guard"),
|
||||
// ("platba ottova 04/2026", "ottova surname"),
|
||||
// ("Henrietta Ottová a Tomáš Němeček 04/2026", "two full names"),
|
||||
// ("Tov platba 04/2026", "nickname alone"),
|
||||
// ("Henrietta Ottova 04/2026", "no diacritics"),
|
||||
// ("Platba od Nemeček Tomas 04/2026", "reversed first+last"),
|
||||
// ("vrbik clenske", "last name only review"),
|
||||
// ("jana platba", "first name review"),
|
||||
// ("neznamy platebce", "no match"),
|
||||
// ]
|
||||
// for text, label in cases: print(label + ":", match_members(text, MEMBERS))
|
||||
// '
|
||||
//
|
||||
// Output:
|
||||
//
|
||||
// full name guard: [('Henrietta Ottová', 'auto')]
|
||||
// ottova surname: [('Henrietta Ottová', 'review')]
|
||||
// two full names: [('Henrietta Ottová', 'auto'), ('Tomáš Němeček (Tov)', 'auto')]
|
||||
// nickname alone: [('Tomáš Němeček (Tov)', 'auto')]
|
||||
// no diacritics: [('Henrietta Ottová', 'auto')]
|
||||
// reversed first+last: [('Tomáš Němeček (Tov)', 'auto')]
|
||||
// last name only review: [('František Vrbík (Štrúdl)', 'review')]
|
||||
// first name review: [('Jana Nováková', 'review')]
|
||||
// no match: []
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
var testMembers = []string{
|
||||
"Henrietta Ottová",
|
||||
"Tomáš Němeček (Tov)",
|
||||
"František Vrbík (Štrúdl)",
|
||||
"Jana Nováková",
|
||||
}
|
||||
|
||||
func TestMatchMembers(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
text string
|
||||
wantContains []string
|
||||
wantExcludes []string
|
||||
wantAllAuto bool
|
||||
}{
|
||||
{
|
||||
// Short-circuit: full name matches → "tov" inside "ottova" must NOT fire
|
||||
name: "full name in message returns only that member",
|
||||
text: "Henrietta Ottová (Heny): 04/2026",
|
||||
wantContains: []string{"Henrietta Ottová"},
|
||||
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: true,
|
||||
},
|
||||
{
|
||||
// "tov" is a substring of "ottova" — nickname must not match inside a surname
|
||||
name: "nickname tov not matched inside ottova",
|
||||
text: "platba ottova 04/2026",
|
||||
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: false,
|
||||
},
|
||||
{
|
||||
name: "two full names both auto",
|
||||
text: "Henrietta Ottová a Tomáš Němeček 04/2026",
|
||||
wantContains: []string{"Henrietta Ottová", "Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: true,
|
||||
},
|
||||
{
|
||||
name: "nickname alone matches correctly",
|
||||
text: "Tov platba 04/2026",
|
||||
wantContains: []string{"Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: true,
|
||||
},
|
||||
{
|
||||
name: "full name without diacritics auto",
|
||||
text: "Henrietta Ottova 04/2026",
|
||||
wantContains: []string{"Henrietta Ottová"},
|
||||
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: true,
|
||||
},
|
||||
{
|
||||
name: "first and last name reversed auto",
|
||||
text: "Platba od Nemeček Tomas 04/2026",
|
||||
wantContains: []string{"Tomáš Němeček (Tov)"},
|
||||
wantAllAuto: true,
|
||||
},
|
||||
{
|
||||
// Last name alone (len≥4, not a common surname) → review confidence
|
||||
name: "last name only yields review",
|
||||
text: "vrbik clenske",
|
||||
wantContains: []string{"František Vrbík (Štrúdl)"},
|
||||
wantAllAuto: false,
|
||||
},
|
||||
{
|
||||
// First name alone (len≥3) → review confidence
|
||||
name: "first name only yields review",
|
||||
text: "jana platba",
|
||||
wantContains: []string{"Jana Nováková"},
|
||||
wantAllAuto: false,
|
||||
},
|
||||
{
|
||||
name: "no match returns empty slice",
|
||||
text: "neznamy platebce",
|
||||
wantContains: nil,
|
||||
wantAllAuto: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := MatchMembers(tc.text, testMembers)
|
||||
|
||||
// Check required members are present
|
||||
for _, want := range tc.wantContains {
|
||||
found := false
|
||||
for _, m := range got {
|
||||
if m.Name == want {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("MatchMembers(%q): want %q in result, got %v", tc.text, want, got)
|
||||
}
|
||||
}
|
||||
|
||||
// Check excluded members are absent
|
||||
for _, exclude := range tc.wantExcludes {
|
||||
for _, m := range got {
|
||||
if m.Name == exclude {
|
||||
t.Errorf("MatchMembers(%q): %q should not be in result, got %v", tc.text, exclude, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all-auto constraint
|
||||
if tc.wantAllAuto {
|
||||
for _, m := range got {
|
||||
if m.Confidence != ConfidenceAuto {
|
||||
t.Errorf("MatchMembers(%q): expected all auto, got %v", tc.text, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user