feat(go/M2.7-2.9): port domain/matching package
New go/internal/domain/matching package porting three helpers from scripts/match_payments.py: - BuildNameVariants: normalized ASCII variants from a member name (nickname in parens, last/first split, len<3 filtered); variants[0] is always the full base name — MatchMembers relies on this invariant. - MatchMembers: auto/review confidence matching with an exact-name short-circuit pass that prevents nickname substrings (tov) from firing inside longer surnames (ottova); common-surname filter for review tier. - FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional days supported)/YYYY-MM-DD passthrough/garbage → never errors. - InferTransactionDetails: composes BuildNameVariants+MatchMembers+ ParseMonthReferences; falls back to sender-only member match and date-derived month when text carries no signal. 21 table-driven tests; all expected values verified against live Python on 2026-05-06. go-build, go-test, go-lint all clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
59
go/internal/domain/matching/name_variants.go
Normal file
59
go/internal/domain/matching/name_variants.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package matching
|
||||
|
||||
import (
|
||||
"fuj-management/go/internal/domain/czech"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
nicknameRe = regexp.MustCompile(`\(([^)]+)\)`)
|
||||
nicknameStripRe = regexp.MustCompile(`\s*\([^)]*\)\s*`)
|
||||
)
|
||||
|
||||
// BuildNameVariants returns searchable lowercase ASCII variants of a member name.
|
||||
//
|
||||
// Example: "František Vrbík (Štrúdl)" → ["frantisek vrbik", "strudl", "vrbik", "frantisek"]
|
||||
//
|
||||
// variants[0] is always the full normalized base name (no nickname). MatchMembers relies on
|
||||
// this invariant for the exact short-circuit pass. Variants shorter than 3 characters are
|
||||
// dropped.
|
||||
//
|
||||
// Ports scripts/match_payments.py _build_name_variants.
|
||||
func BuildNameVariants(name string) []string {
|
||||
var nickname string
|
||||
if m := nicknameRe.FindStringSubmatch(name); m != nil {
|
||||
nickname = m[1]
|
||||
}
|
||||
|
||||
base := strings.TrimSpace(nicknameStripRe.ReplaceAllString(name, " "))
|
||||
normalizedBase := czech.Normalize(base)
|
||||
normalizedNick := czech.Normalize(nickname)
|
||||
|
||||
variants := []string{normalizedBase}
|
||||
if normalizedNick != "" {
|
||||
variants = append(variants, normalizedNick)
|
||||
}
|
||||
|
||||
parts := strings.Fields(normalizedBase)
|
||||
if len(parts) >= 2 {
|
||||
variants = append(variants, parts[len(parts)-1]) // last name
|
||||
variants = append(variants, parts[0]) // first name
|
||||
}
|
||||
|
||||
filtered := variants[:0]
|
||||
for _, v := range variants {
|
||||
if len(v) >= 3 {
|
||||
filtered = append(filtered, v)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
// wordIn returns true if needle appears as a whole word in haystack.
|
||||
// Both needle and haystack must already be ASCII-folded (via czech.Normalize).
|
||||
func wordIn(needle, haystack string) bool {
|
||||
pattern := `\b` + regexp.QuoteMeta(needle) + `\b`
|
||||
matched, _ := regexp.MatchString(pattern, haystack)
|
||||
return matched
|
||||
}
|
||||
Reference in New Issue
Block a user