New go/internal/domain/matching package porting three helpers from scripts/match_payments.py: - BuildNameVariants: normalized ASCII variants from a member name (nickname in parens, last/first split, len<3 filtered); variants[0] is always the full base name — MatchMembers relies on this invariant. - MatchMembers: auto/review confidence matching with an exact-name short-circuit pass that prevents nickname substrings (tov) from firing inside longer surnames (ottova); common-surname filter for review tier. - FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional days supported)/YYYY-MM-DD passthrough/garbage → never errors. - InferTransactionDetails: composes BuildNameVariants+MatchMembers+ ParseMonthReferences; falls back to sender-only member match and date-derived month when text carries no signal. 21 table-driven tests; all expected values verified against live Python on 2026-05-06. go-build, go-test, go-lint all clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
132 lines
3.7 KiB
Go
132 lines
3.7 KiB
Go
package matching
|
|
|
|
import (
|
|
"fuj-management/go/internal/domain/czech"
|
|
"strings"
|
|
)
|
|
|
|
// Confidence indicates how certain a member match is.
|
|
type Confidence string
|
|
|
|
const (
|
|
ConfidenceAuto Confidence = "auto"
|
|
ConfidenceReview Confidence = "review"
|
|
)
|
|
|
|
// Match pairs a canonical member name with the confidence of the match.
|
|
type Match struct {
|
|
Name string
|
|
Confidence Confidence
|
|
}
|
|
|
|
var commonSurnames = map[string]bool{
|
|
"novak": true,
|
|
"novakova": true,
|
|
"prach": true,
|
|
}
|
|
|
|
// MatchMembers finds members mentioned in text and returns them with a
|
|
// confidence level of "auto" (reliable) or "review" (needs human verification).
|
|
//
|
|
// Algorithm (ported verbatim from scripts/match_payments.py match_members):
|
|
// 1. Exact short-circuit: if any member's full normalized name appears as whole
|
|
// words in normalize(text), return ONLY those matches as auto. This prevents
|
|
// nickname "tov" from matching inside surname "ottova".
|
|
// 2. Per-member first-match-wins: full-name substring → first+last both present
|
|
// (any order) → nickname whole-word. Each yields auto.
|
|
// 3. Review tier: last name (len≥4, not a common surname) → first name (len≥3)
|
|
// → single-part name (len≥4). Each yields review.
|
|
// 4. Final filter: if any auto exists, drop all review.
|
|
func MatchMembers(text string, memberNames []string) []Match {
|
|
normalizedText := czech.Normalize(text)
|
|
|
|
// Pass 1: exact short-circuit
|
|
var exactMatches []Match
|
|
for _, name := range memberNames {
|
|
variants := BuildNameVariants(name)
|
|
if len(variants) == 0 {
|
|
continue
|
|
}
|
|
fullName := variants[0]
|
|
if fullName != "" && wordIn(fullName, normalizedText) {
|
|
exactMatches = append(exactMatches, Match{Name: name, Confidence: ConfidenceAuto})
|
|
}
|
|
}
|
|
if len(exactMatches) > 0 {
|
|
return exactMatches
|
|
}
|
|
|
|
// Pass 2 + 3: fuzzy matching
|
|
var matches []Match
|
|
for _, name := range memberNames {
|
|
variants := BuildNameVariants(name)
|
|
fullName := ""
|
|
if len(variants) > 0 {
|
|
fullName = variants[0]
|
|
}
|
|
parts := strings.Fields(fullName)
|
|
|
|
// Auto tier
|
|
if fullName != "" && strings.Contains(normalizedText, fullName) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
|
|
continue
|
|
}
|
|
if len(parts) >= 2 {
|
|
if wordIn(parts[0], normalizedText) && wordIn(parts[len(parts)-1], normalizedText) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
|
|
continue
|
|
}
|
|
}
|
|
// Nickname check
|
|
if m := nicknameRe.FindStringSubmatch(name); m != nil {
|
|
nick := czech.Normalize(m[1])
|
|
if nick != "" && wordIn(nick, normalizedText) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Review tier
|
|
if len(parts) >= 2 {
|
|
lastName := parts[len(parts)-1]
|
|
firstName := parts[0]
|
|
if len(lastName) >= 4 && !commonSurnames[lastName] && wordIn(lastName, normalizedText) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
|
|
continue
|
|
}
|
|
if len(firstName) >= 3 && wordIn(firstName, normalizedText) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
|
|
continue
|
|
}
|
|
} else if len(parts) == 1 {
|
|
if len(parts[0]) >= 4 && wordIn(parts[0], normalizedText) {
|
|
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// Final filter: drop review if any auto exists
|
|
hasAuto := false
|
|
for _, m := range matches {
|
|
if m.Confidence == ConfidenceAuto {
|
|
hasAuto = true
|
|
break
|
|
}
|
|
}
|
|
if hasAuto {
|
|
filtered := matches[:0]
|
|
for _, m := range matches {
|
|
if m.Confidence == ConfidenceAuto {
|
|
filtered = append(filtered, m)
|
|
}
|
|
}
|
|
return filtered
|
|
}
|
|
|
|
if matches == nil {
|
|
return []Match{}
|
|
}
|
|
return matches
|
|
}
|