Files
fuj-management/go/internal/domain/matching/match_members_test.go
Jan Novak e596f0000e feat(go/M2.7-2.9): port domain/matching package
New go/internal/domain/matching package porting three helpers from
scripts/match_payments.py:

- BuildNameVariants: normalized ASCII variants from a member name (nickname
  in parens, last/first split, len<3 filtered); variants[0] is always the
  full base name — MatchMembers relies on this invariant.
- MatchMembers: auto/review confidence matching with an exact-name
  short-circuit pass that prevents nickname substrings (tov) from firing
  inside longer surnames (ottova); common-surname filter for review tier.
- FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional
  days supported)/YYYY-MM-DD passthrough/garbage → never errors.
- InferTransactionDetails: composes BuildNameVariants+MatchMembers+
  ParseMonthReferences; falls back to sender-only member match and
  date-derived month when text carries no signal.

21 table-driven tests; all expected values verified against live Python
on 2026-05-06. go-build, go-test, go-lint all clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-06 13:19:42 +02:00

157 lines
4.6 KiB
Go

package matching
// Expected values verified against scripts/match_payments.py and
// tests/test_match_members.py on 2026-05-06:
//
// PYTHONPATH=scripts:. python3 -c '
// from match_payments import match_members
// MEMBERS = ["Henrietta Ottová", "Tomáš Němeček (Tov)", "František Vrbík (Štrúdl)", "Jana Nováková"]
// cases = [
// ("Henrietta Ottová (Heny): 04/2026", "full name guard"),
// ("platba ottova 04/2026", "ottova surname"),
// ("Henrietta Ottová a Tomáš Němeček 04/2026", "two full names"),
// ("Tov platba 04/2026", "nickname alone"),
// ("Henrietta Ottova 04/2026", "no diacritics"),
// ("Platba od Nemeček Tomas 04/2026", "reversed first+last"),
// ("vrbik clenske", "last name only review"),
// ("jana platba", "first name review"),
// ("neznamy platebce", "no match"),
// ]
// for text, label in cases: print(label + ":", match_members(text, MEMBERS))
// '
//
// Output:
//
// full name guard: [('Henrietta Ottová', 'auto')]
// ottova surname: [('Henrietta Ottová', 'review')]
// two full names: [('Henrietta Ottová', 'auto'), ('Tomáš Němeček (Tov)', 'auto')]
// nickname alone: [('Tomáš Němeček (Tov)', 'auto')]
// no diacritics: [('Henrietta Ottová', 'auto')]
// reversed first+last: [('Tomáš Němeček (Tov)', 'auto')]
// last name only review: [('František Vrbík (Štrúdl)', 'review')]
// first name review: [('Jana Nováková', 'review')]
// no match: []
import (
"testing"
)
var testMembers = []string{
"Henrietta Ottová",
"Tomáš Němeček (Tov)",
"František Vrbík (Štrúdl)",
"Jana Nováková",
}
func TestMatchMembers(t *testing.T) {
t.Parallel()
cases := []struct {
name string
text string
wantContains []string
wantExcludes []string
wantAllAuto bool
}{
{
// Short-circuit: full name matches → "tov" inside "ottova" must NOT fire
name: "full name in message returns only that member",
text: "Henrietta Ottová (Heny): 04/2026",
wantContains: []string{"Henrietta Ottová"},
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
// "tov" is a substring of "ottova" — nickname must not match inside a surname
name: "nickname tov not matched inside ottova",
text: "platba ottova 04/2026",
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: false,
},
{
name: "two full names both auto",
text: "Henrietta Ottová a Tomáš Němeček 04/2026",
wantContains: []string{"Henrietta Ottová", "Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "nickname alone matches correctly",
text: "Tov platba 04/2026",
wantContains: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "full name without diacritics auto",
text: "Henrietta Ottova 04/2026",
wantContains: []string{"Henrietta Ottová"},
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "first and last name reversed auto",
text: "Platba od Nemeček Tomas 04/2026",
wantContains: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
// Last name alone (len≥4, not a common surname) → review confidence
name: "last name only yields review",
text: "vrbik clenske",
wantContains: []string{"František Vrbík (Štrúdl)"},
wantAllAuto: false,
},
{
// First name alone (len≥3) → review confidence
name: "first name only yields review",
text: "jana platba",
wantContains: []string{"Jana Nováková"},
wantAllAuto: false,
},
{
name: "no match returns empty slice",
text: "neznamy platebce",
wantContains: nil,
wantAllAuto: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := MatchMembers(tc.text, testMembers)
// Check required members are present
for _, want := range tc.wantContains {
found := false
for _, m := range got {
if m.Name == want {
found = true
break
}
}
if !found {
t.Errorf("MatchMembers(%q): want %q in result, got %v", tc.text, want, got)
}
}
// Check excluded members are absent
for _, exclude := range tc.wantExcludes {
for _, m := range got {
if m.Name == exclude {
t.Errorf("MatchMembers(%q): %q should not be in result, got %v", tc.text, exclude, got)
}
}
}
// Check all-auto constraint
if tc.wantAllAuto {
for _, m := range got {
if m.Confidence != ConfidenceAuto {
t.Errorf("MatchMembers(%q): expected all auto, got %v", tc.text, got)
}
}
}
})
}
}