New go/internal/domain/matching package porting three helpers from scripts/match_payments.py: - BuildNameVariants: normalized ASCII variants from a member name (nickname in parens, last/first split, len<3 filtered); variants[0] is always the full base name — MatchMembers relies on this invariant. - MatchMembers: auto/review confidence matching with an exact-name short-circuit pass that prevents nickname substrings (tov) from firing inside longer surnames (ottova); common-surname filter for review tier. - FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional days supported)/YYYY-MM-DD passthrough/garbage → never errors. - InferTransactionDetails: composes BuildNameVariants+MatchMembers+ ParseMonthReferences; falls back to sender-only member match and date-derived month when text carries no signal. 21 table-driven tests; all expected values verified against live Python on 2026-05-06. go-build, go-test, go-lint all clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
60 lines
1.7 KiB
Go
60 lines
1.7 KiB
Go
package matching
|
|
|
|
import (
|
|
"fuj-management/go/internal/domain/czech"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
nicknameRe = regexp.MustCompile(`\(([^)]+)\)`)
|
|
nicknameStripRe = regexp.MustCompile(`\s*\([^)]*\)\s*`)
|
|
)
|
|
|
|
// BuildNameVariants returns searchable lowercase ASCII variants of a member name.
|
|
//
|
|
// Example: "František Vrbík (Štrúdl)" → ["frantisek vrbik", "strudl", "vrbik", "frantisek"]
|
|
//
|
|
// variants[0] is always the full normalized base name (no nickname). MatchMembers relies on
|
|
// this invariant for the exact short-circuit pass. Variants shorter than 3 characters are
|
|
// dropped.
|
|
//
|
|
// Ports scripts/match_payments.py _build_name_variants.
|
|
func BuildNameVariants(name string) []string {
|
|
var nickname string
|
|
if m := nicknameRe.FindStringSubmatch(name); m != nil {
|
|
nickname = m[1]
|
|
}
|
|
|
|
base := strings.TrimSpace(nicknameStripRe.ReplaceAllString(name, " "))
|
|
normalizedBase := czech.Normalize(base)
|
|
normalizedNick := czech.Normalize(nickname)
|
|
|
|
variants := []string{normalizedBase}
|
|
if normalizedNick != "" {
|
|
variants = append(variants, normalizedNick)
|
|
}
|
|
|
|
parts := strings.Fields(normalizedBase)
|
|
if len(parts) >= 2 {
|
|
variants = append(variants, parts[len(parts)-1]) // last name
|
|
variants = append(variants, parts[0]) // first name
|
|
}
|
|
|
|
filtered := variants[:0]
|
|
for _, v := range variants {
|
|
if len(v) >= 3 {
|
|
filtered = append(filtered, v)
|
|
}
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
// wordIn returns true if needle appears as a whole word in haystack.
|
|
// Both needle and haystack must already be ASCII-folded (via czech.Normalize).
|
|
func wordIn(needle, haystack string) bool {
|
|
pattern := `\b` + regexp.QuoteMeta(needle) + `\b`
|
|
matched, _ := regexp.MatchString(pattern, haystack)
|
|
return matched
|
|
}
|