New go/internal/domain/matching package porting three helpers from scripts/match_payments.py: - BuildNameVariants: normalized ASCII variants from a member name (nickname in parens, last/first split, len<3 filtered); variants[0] is always the full base name — MatchMembers relies on this invariant. - MatchMembers: auto/review confidence matching with an exact-name short-circuit pass that prevents nickname substrings (tov) from firing inside longer surnames (ottova); common-surname filter for review tier. - FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional days supported)/YYYY-MM-DD passthrough/garbage → never errors. - InferTransactionDetails: composes BuildNameVariants+MatchMembers+ ParseMonthReferences; falls back to sender-only member match and date-derived month when text carries no signal. 21 table-driven tests; all expected values verified against live Python on 2026-05-06. go-build, go-test, go-lint all clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
157 lines
4.6 KiB
Go
157 lines
4.6 KiB
Go
package matching
|
|
|
|
// Expected values verified against scripts/match_payments.py and
|
|
// tests/test_match_members.py on 2026-05-06:
|
|
//
|
|
// PYTHONPATH=scripts:. python3 -c '
|
|
// from match_payments import match_members
|
|
// MEMBERS = ["Henrietta Ottová", "Tomáš Němeček (Tov)", "František Vrbík (Štrúdl)", "Jana Nováková"]
|
|
// cases = [
|
|
// ("Henrietta Ottová (Heny): 04/2026", "full name guard"),
|
|
// ("platba ottova 04/2026", "ottova surname"),
|
|
// ("Henrietta Ottová a Tomáš Němeček 04/2026", "two full names"),
|
|
// ("Tov platba 04/2026", "nickname alone"),
|
|
// ("Henrietta Ottova 04/2026", "no diacritics"),
|
|
// ("Platba od Nemeček Tomas 04/2026", "reversed first+last"),
|
|
// ("vrbik clenske", "last name only review"),
|
|
// ("jana platba", "first name review"),
|
|
// ("neznamy platebce", "no match"),
|
|
// ]
|
|
// for text, label in cases: print(label + ":", match_members(text, MEMBERS))
|
|
// '
|
|
//
|
|
// Output:
|
|
//
|
|
// full name guard: [('Henrietta Ottová', 'auto')]
|
|
// ottova surname: [('Henrietta Ottová', 'review')]
|
|
// two full names: [('Henrietta Ottová', 'auto'), ('Tomáš Němeček (Tov)', 'auto')]
|
|
// nickname alone: [('Tomáš Němeček (Tov)', 'auto')]
|
|
// no diacritics: [('Henrietta Ottová', 'auto')]
|
|
// reversed first+last: [('Tomáš Němeček (Tov)', 'auto')]
|
|
// last name only review: [('František Vrbík (Štrúdl)', 'review')]
|
|
// first name review: [('Jana Nováková', 'review')]
|
|
// no match: []
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
var testMembers = []string{
|
|
"Henrietta Ottová",
|
|
"Tomáš Němeček (Tov)",
|
|
"František Vrbík (Štrúdl)",
|
|
"Jana Nováková",
|
|
}
|
|
|
|
func TestMatchMembers(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cases := []struct {
|
|
name string
|
|
text string
|
|
wantContains []string
|
|
wantExcludes []string
|
|
wantAllAuto bool
|
|
}{
|
|
{
|
|
// Short-circuit: full name matches → "tov" inside "ottova" must NOT fire
|
|
name: "full name in message returns only that member",
|
|
text: "Henrietta Ottová (Heny): 04/2026",
|
|
wantContains: []string{"Henrietta Ottová"},
|
|
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
|
wantAllAuto: true,
|
|
},
|
|
{
|
|
// "tov" is a substring of "ottova" — nickname must not match inside a surname
|
|
name: "nickname tov not matched inside ottova",
|
|
text: "platba ottova 04/2026",
|
|
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
|
wantAllAuto: false,
|
|
},
|
|
{
|
|
name: "two full names both auto",
|
|
text: "Henrietta Ottová a Tomáš Němeček 04/2026",
|
|
wantContains: []string{"Henrietta Ottová", "Tomáš Němeček (Tov)"},
|
|
wantAllAuto: true,
|
|
},
|
|
{
|
|
name: "nickname alone matches correctly",
|
|
text: "Tov platba 04/2026",
|
|
wantContains: []string{"Tomáš Němeček (Tov)"},
|
|
wantAllAuto: true,
|
|
},
|
|
{
|
|
name: "full name without diacritics auto",
|
|
text: "Henrietta Ottova 04/2026",
|
|
wantContains: []string{"Henrietta Ottová"},
|
|
wantExcludes: []string{"Tomáš Němeček (Tov)"},
|
|
wantAllAuto: true,
|
|
},
|
|
{
|
|
name: "first and last name reversed auto",
|
|
text: "Platba od Nemeček Tomas 04/2026",
|
|
wantContains: []string{"Tomáš Němeček (Tov)"},
|
|
wantAllAuto: true,
|
|
},
|
|
{
|
|
// Last name alone (len≥4, not a common surname) → review confidence
|
|
name: "last name only yields review",
|
|
text: "vrbik clenske",
|
|
wantContains: []string{"František Vrbík (Štrúdl)"},
|
|
wantAllAuto: false,
|
|
},
|
|
{
|
|
// First name alone (len≥3) → review confidence
|
|
name: "first name only yields review",
|
|
text: "jana platba",
|
|
wantContains: []string{"Jana Nováková"},
|
|
wantAllAuto: false,
|
|
},
|
|
{
|
|
name: "no match returns empty slice",
|
|
text: "neznamy platebce",
|
|
wantContains: nil,
|
|
wantAllAuto: false,
|
|
},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Parallel()
|
|
got := MatchMembers(tc.text, testMembers)
|
|
|
|
// Check required members are present
|
|
for _, want := range tc.wantContains {
|
|
found := false
|
|
for _, m := range got {
|
|
if m.Name == want {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Errorf("MatchMembers(%q): want %q in result, got %v", tc.text, want, got)
|
|
}
|
|
}
|
|
|
|
// Check excluded members are absent
|
|
for _, exclude := range tc.wantExcludes {
|
|
for _, m := range got {
|
|
if m.Name == exclude {
|
|
t.Errorf("MatchMembers(%q): %q should not be in result, got %v", tc.text, exclude, got)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check all-auto constraint
|
|
if tc.wantAllAuto {
|
|
for _, m := range got {
|
|
if m.Confidence != ConfidenceAuto {
|
|
t.Errorf("MatchMembers(%q): expected all auto, got %v", tc.text, got)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|