Files
fuj-management/go/internal/domain/matching/match_members.go
Jan Novak e596f0000e feat(go/M2.7-2.9): port domain/matching package
New go/internal/domain/matching package porting three helpers from
scripts/match_payments.py:

- BuildNameVariants: normalized ASCII variants from a member name (nickname
  in parens, last/first split, len<3 filtered); variants[0] is always the
  full base name — MatchMembers relies on this invariant.
- MatchMembers: auto/review confidence matching with an exact-name
  short-circuit pass that prevents nickname substrings (tov) from firing
  inside longer surnames (ottova); common-surname filter for review tier.
- FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional
  days supported)/YYYY-MM-DD passthrough/garbage → never errors.
- InferTransactionDetails: composes BuildNameVariants+MatchMembers+
  ParseMonthReferences; falls back to sender-only member match and
  date-derived month when text carries no signal.

21 table-driven tests; all expected values verified against live Python
on 2026-05-06. go-build, go-test, go-lint all clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-06 13:19:42 +02:00

132 lines
3.7 KiB
Go

package matching
import (
"fuj-management/go/internal/domain/czech"
"strings"
)
// Confidence indicates how certain a member match is.
type Confidence string
const (
ConfidenceAuto Confidence = "auto"
ConfidenceReview Confidence = "review"
)
// Match pairs a canonical member name with the confidence of the match.
type Match struct {
Name string
Confidence Confidence
}
var commonSurnames = map[string]bool{
"novak": true,
"novakova": true,
"prach": true,
}
// MatchMembers finds members mentioned in text and returns them with a
// confidence level of "auto" (reliable) or "review" (needs human verification).
//
// Algorithm (ported verbatim from scripts/match_payments.py match_members):
// 1. Exact short-circuit: if any member's full normalized name appears as whole
// words in normalize(text), return ONLY those matches as auto. This prevents
// nickname "tov" from matching inside surname "ottova".
// 2. Per-member first-match-wins: full-name substring → first+last both present
// (any order) → nickname whole-word. Each yields auto.
// 3. Review tier: last name (len≥4, not a common surname) → first name (len≥3)
// → single-part name (len≥4). Each yields review.
// 4. Final filter: if any auto exists, drop all review.
func MatchMembers(text string, memberNames []string) []Match {
normalizedText := czech.Normalize(text)
// Pass 1: exact short-circuit
var exactMatches []Match
for _, name := range memberNames {
variants := BuildNameVariants(name)
if len(variants) == 0 {
continue
}
fullName := variants[0]
if fullName != "" && wordIn(fullName, normalizedText) {
exactMatches = append(exactMatches, Match{Name: name, Confidence: ConfidenceAuto})
}
}
if len(exactMatches) > 0 {
return exactMatches
}
// Pass 2 + 3: fuzzy matching
var matches []Match
for _, name := range memberNames {
variants := BuildNameVariants(name)
fullName := ""
if len(variants) > 0 {
fullName = variants[0]
}
parts := strings.Fields(fullName)
// Auto tier
if fullName != "" && strings.Contains(normalizedText, fullName) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
if len(parts) >= 2 {
if wordIn(parts[0], normalizedText) && wordIn(parts[len(parts)-1], normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
}
// Nickname check
if m := nicknameRe.FindStringSubmatch(name); m != nil {
nick := czech.Normalize(m[1])
if nick != "" && wordIn(nick, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
}
// Review tier
if len(parts) >= 2 {
lastName := parts[len(parts)-1]
firstName := parts[0]
if len(lastName) >= 4 && !commonSurnames[lastName] && wordIn(lastName, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
if len(firstName) >= 3 && wordIn(firstName, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
} else if len(parts) == 1 {
if len(parts[0]) >= 4 && wordIn(parts[0], normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
}
}
// Final filter: drop review if any auto exists
hasAuto := false
for _, m := range matches {
if m.Confidence == ConfidenceAuto {
hasAuto = true
break
}
}
if hasAuto {
filtered := matches[:0]
for _, m := range matches {
if m.Confidence == ConfidenceAuto {
filtered = append(filtered, m)
}
}
return filtered
}
if matches == nil {
return []Match{}
}
return matches
}