Files
fuj-management/go/internal/domain/matching/name_variants.go
Jan Novak e596f0000e feat(go/M2.7-2.9): port domain/matching package
New go/internal/domain/matching package porting three helpers from
scripts/match_payments.py:

- BuildNameVariants: normalized ASCII variants from a member name (nickname
  in parens, last/first split, len<3 filtered); variants[0] is always the
  full base name — MatchMembers relies on this invariant.
- MatchMembers: auto/review confidence matching with an exact-name
  short-circuit pass that prevents nickname substrings (tov) from firing
  inside longer surnames (ottova); common-surname filter for review tier.
- FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional
  days supported)/YYYY-MM-DD passthrough/garbage → never errors.
- InferTransactionDetails: composes BuildNameVariants+MatchMembers+
  ParseMonthReferences; falls back to sender-only member match and
  date-derived month when text carries no signal.

21 table-driven tests; all expected values verified against live Python
on 2026-05-06. go-build, go-test, go-lint all clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-06 13:19:42 +02:00

60 lines
1.7 KiB
Go

package matching
import (
"fuj-management/go/internal/domain/czech"
"regexp"
"strings"
)
var (
nicknameRe = regexp.MustCompile(`\(([^)]+)\)`)
nicknameStripRe = regexp.MustCompile(`\s*\([^)]*\)\s*`)
)
// BuildNameVariants returns searchable lowercase ASCII variants of a member name.
//
// Example: "František Vrbík (Štrúdl)" → ["frantisek vrbik", "strudl", "vrbik", "frantisek"]
//
// variants[0] is always the full normalized base name (no nickname). MatchMembers relies on
// this invariant for the exact short-circuit pass. Variants shorter than 3 characters are
// dropped.
//
// Ports scripts/match_payments.py _build_name_variants.
func BuildNameVariants(name string) []string {
var nickname string
if m := nicknameRe.FindStringSubmatch(name); m != nil {
nickname = m[1]
}
base := strings.TrimSpace(nicknameStripRe.ReplaceAllString(name, " "))
normalizedBase := czech.Normalize(base)
normalizedNick := czech.Normalize(nickname)
variants := []string{normalizedBase}
if normalizedNick != "" {
variants = append(variants, normalizedNick)
}
parts := strings.Fields(normalizedBase)
if len(parts) >= 2 {
variants = append(variants, parts[len(parts)-1]) // last name
variants = append(variants, parts[0]) // first name
}
filtered := variants[:0]
for _, v := range variants {
if len(v) >= 3 {
filtered = append(filtered, v)
}
}
return filtered
}
// wordIn returns true if needle appears as a whole word in haystack.
// Both needle and haystack must already be ASCII-folded (via czech.Normalize).
func wordIn(needle, haystack string) bool {
pattern := `\b` + regexp.QuoteMeta(needle) + `\b`
matched, _ := regexp.MatchString(pattern, haystack)
return matched
}