feat(go/M2.7-2.9): port domain/matching package

New go/internal/domain/matching package porting three helpers from
scripts/match_payments.py:

- BuildNameVariants: normalized ASCII variants from a member name (nickname
  in parens, last/first split, len<3 filtered); variants[0] is always the
  full base name — MatchMembers relies on this invariant.
- MatchMembers: auto/review confidence matching with an exact-name
  short-circuit pass that prevents nickname substrings (tov) from firing
  inside longer surnames (ottova); common-surname filter for review tier.
- FormatDate: nil/empty/""/serial int/float64 (since 1899-12-30, fractional
  days supported)/YYYY-MM-DD passthrough/garbage → never errors.
- InferTransactionDetails: composes BuildNameVariants+MatchMembers+
  ParseMonthReferences; falls back to sender-only member match and
  date-derived month when text carries no signal.

21 table-driven tests; all expected values verified against live Python
on 2026-05-06. go-build, go-test, go-lint all clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-06 13:19:42 +02:00
parent c2bffed1b8
commit e596f0000e
12 changed files with 835 additions and 3 deletions

View File

@@ -0,0 +1,2 @@
// Package matching ports name/member matching from scripts/match_payments.py.
package matching

View File

@@ -0,0 +1,41 @@
package matching
import (
"fmt"
"strings"
"time"
)
var sheetsEpoch = time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC)
// FormatDate normalizes a date value from Google Sheets.
//
// Accepts nil, empty string, int/float64 Sheets serial days since 1899-12-30,
// a pre-formatted "YYYY-MM-DD" string (returned as-is), or any other value
// (returned as fmt.Sprint(v).TrimSpace). Never returns an error.
//
// Ports scripts/match_payments.py format_date.
func FormatDate(val any) string {
if val == nil {
return ""
}
switch v := val.(type) {
case int:
return sheetsEpoch.Add(time.Duration(float64(v) * 24 * float64(time.Hour))).Format("2006-01-02")
case int64:
return sheetsEpoch.Add(time.Duration(float64(v) * 24 * float64(time.Hour))).Format("2006-01-02")
case float64:
return sheetsEpoch.Add(time.Duration(v * 24 * float64(time.Hour))).Format("2006-01-02")
case string:
s := strings.TrimSpace(v)
if s == "" {
return ""
}
if len(s) == 10 && s[4] == '-' && s[7] == '-' {
return s
}
return s
default:
return strings.TrimSpace(fmt.Sprint(v))
}
}

View File

@@ -0,0 +1,49 @@
package matching
// Expected values verified against scripts/match_payments.py on 2026-05-06:
//
// PYTHONPATH=scripts:. python3 -c '
// from match_payments import format_date
// for v in [None, "", 44197, 44197.5, "2026-04-15", "garbage", " 2026-04-15 "]:
// print(repr(format_date(v)))
// '
//
// Output:
//
// ''
// ''
// '2021-01-01'
// '2021-01-01'
// '2026-04-15'
// 'garbage'
// '2026-04-15'
import "testing"
func TestFormatDate(t *testing.T) {
t.Parallel()
cases := []struct {
name string
input any
want string
}{
{name: "nil", input: nil, want: ""},
{name: "empty string", input: "", want: ""},
{name: "serial int", input: int(44197), want: "2021-01-01"},
{name: "serial float fractional", input: float64(44197.5), want: "2021-01-01"},
{name: "already formatted", input: "2026-04-15", want: "2026-04-15"},
{name: "garbage string", input: "garbage", want: "garbage"},
{name: "padded date string trimmed", input: " 2026-04-15 ", want: "2026-04-15"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := FormatDate(tc.input)
if got != tc.want {
t.Errorf("FormatDate(%v) = %q, want %q", tc.input, got, tc.want)
}
})
}
}

View File

@@ -0,0 +1,89 @@
package matching
import (
"fmt"
"fuj-management/go/internal/domain/czech"
"time"
)
// Transaction is the subset of a payment row used by InferTransactionDetails.
// Date accepts string ("YYYY-MM-DD"), float64 (Sheets serial), or int — matching
// the heterogeneous types returned by the Sheets API and the FIO scraper.
type Transaction struct {
Sender string
Message string
UserID string
Date any
}
// InferredDetails is the result of InferTransactionDetails.
type InferredDetails struct {
Members []Match
Months []string
SearchText string
}
// InferTransactionDetails infers which member(s) and month(s) a transaction belongs to.
//
// Search text for member matching: sender + message + user_id.
// Month search text: message + user_id only (sender excluded, matching Python).
// Fallback 1: if no members found, retry match on sender alone.
// Fallback 2: if no months found, derive from tx.Date (Sheets serial or YYYY-MM-DD).
//
// defaultYear seeds czech.ParseMonthReferences (Python defaulted to the current year;
// callers should pass time.Now().Year() or a fixed year for deterministic tests).
//
// Ports scripts/match_payments.py infer_transaction_details.
func InferTransactionDetails(tx Transaction, memberNames []string, defaultYear int) InferredDetails {
searchText := fmt.Sprintf("%s %s %s", tx.Sender, tx.Message, tx.UserID)
members := MatchMembers(searchText, memberNames)
months := czech.ParseMonthReferences(tx.Message+" "+tx.UserID, defaultYear)
if len(members) == 0 {
members = MatchMembers(tx.Sender, memberNames)
}
if len(months) == 0 && tx.Date != nil && tx.Date != "" {
if ym := inferMonthFromDate(tx.Date); ym != "" {
months = []string{ym}
}
}
if months == nil {
months = []string{}
}
return InferredDetails{
Members: members,
Months: months,
SearchText: searchText,
}
}
// inferMonthFromDate converts a date value to "YYYY-MM" for the month fallback.
// Returns "" on any error, matching Python's bare except pass.
func inferMonthFromDate(val any) string {
switch v := val.(type) {
case int:
dt := sheetsEpoch.Add(time.Duration(float64(v) * 24 * float64(time.Hour)))
return dt.Format("2006-01")
case int64:
dt := sheetsEpoch.Add(time.Duration(float64(v) * 24 * float64(time.Hour)))
return dt.Format("2006-01")
case float64:
dt := sheetsEpoch.Add(time.Duration(v * 24 * float64(time.Hour)))
return dt.Format("2006-01")
case string:
if v == "" {
return ""
}
dt, err := time.Parse("2006-01-02", v)
if err != nil {
return ""
}
return dt.Format("2006-01")
default:
return ""
}
}

View File

@@ -0,0 +1,108 @@
package matching
// Expected values verified against scripts/match_payments.py on 2026-05-06:
//
// PYTHONPATH=scripts:. python3 << 'EOF'
// from match_payments import infer_transaction_details
// MEMBERS = ["Tomáš Němeček (Tov)", "Jana Nováková"]
// cases = [
// ({"sender":"Tomas Nemecek","message":"clenske 04/2026","user_id":"","date":"2026-04-15"}, "full match"),
// ({"sender":"Tomas Nemecek","message":"","user_id":"","date":"2026-04-15"}, "sender fallback month"),
// ({"sender":"Jana Novakova","message":"","user_id":"","date":44197}, "serial int date"),
// ({"sender":"neznamy","message":"","user_id":"","date":""}, "no match"),
// ({"sender":"Tomas Nemecek","message":"","user_id":"","date":44197.5}, "serial float date"),
// ]
// for tx, label in cases:
// r = infer_transaction_details(tx, MEMBERS)
// print(label + ": members=" + repr(r["members"]) + " months=" + repr(r["months"]) + " search_text=" + repr(r["search_text"]))
// EOF
//
// Output:
//
// full match: members=[('Tomáš Němeček (Tov)', 'auto')] months=['2026-04'] search_text='Tomas Nemecek clenske 04/2026 '
// sender fallback month: members=[('Tomáš Němeček (Tov)', 'auto')] months=['2026-04'] search_text='Tomas Nemecek '
// serial int date: members=[('Jana Nováková', 'auto')] months=['2021-01'] search_text='Jana Novakova '
// no match: members=[] months=[] search_text='neznamy '
// serial float date: members=[('Tomáš Němeček (Tov)', 'auto')] months=['2021-01'] search_text='Tomas Nemecek '
import (
"reflect"
"testing"
)
var inferMembers = []string{"Tomáš Němeček (Tov)", "Jana Nováková"}
func TestInferTransactionDetails(t *testing.T) {
t.Parallel()
cases := []struct {
name string
tx Transaction
defaultYear int
wantMembers []Match
wantMonths []string
wantSearchText string
}{
{
name: "full match — members and months from search text",
tx: Transaction{Sender: "Tomas Nemecek", Message: "clenske 04/2026", UserID: "", Date: "2026-04-15"},
defaultYear: 2026,
wantMembers: []Match{{Name: "Tomáš Němeček (Tov)", Confidence: ConfidenceAuto}},
wantMonths: []string{"2026-04"},
// Python: sender + " " + message + " " + user_id (no trim)
wantSearchText: "Tomas Nemecek clenske 04/2026 ",
},
{
// months not in message → fall back to date string
name: "months fall back to date string",
tx: Transaction{Sender: "Tomas Nemecek", Message: "", UserID: "", Date: "2026-04-15"},
defaultYear: 2026,
wantMembers: []Match{{Name: "Tomáš Němeček (Tov)", Confidence: ConfidenceAuto}},
wantMonths: []string{"2026-04"},
wantSearchText: "Tomas Nemecek ",
},
{
// months fall back to Sheets serial int date
name: "months fall back to serial int date",
tx: Transaction{Sender: "Jana Novakova", Message: "", UserID: "", Date: int(44197)},
defaultYear: 2026,
wantMembers: []Match{{Name: "Jana Nováková", Confidence: ConfidenceAuto}},
wantMonths: []string{"2021-01"},
wantSearchText: "Jana Novakova ",
},
{
// months fall back to Sheets serial float64 date
name: "months fall back to serial float date",
tx: Transaction{Sender: "Tomas Nemecek", Message: "", UserID: "", Date: float64(44197.5)},
defaultYear: 2026,
wantMembers: []Match{{Name: "Tomáš Němeček (Tov)", Confidence: ConfidenceAuto}},
wantMonths: []string{"2021-01"},
wantSearchText: "Tomas Nemecek ",
},
{
name: "no match — both slices empty not nil",
tx: Transaction{Sender: "neznamy", Message: "", UserID: "", Date: ""},
defaultYear: 2026,
wantMembers: []Match{},
wantMonths: []string{},
wantSearchText: "neznamy ",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := InferTransactionDetails(tc.tx, inferMembers, tc.defaultYear)
if !reflect.DeepEqual(got.Members, tc.wantMembers) {
t.Errorf("Members\n got %v\n want %v", got.Members, tc.wantMembers)
}
if !reflect.DeepEqual(got.Months, tc.wantMonths) {
t.Errorf("Months\n got %v\n want %v", got.Months, tc.wantMonths)
}
if got.SearchText != tc.wantSearchText {
t.Errorf("SearchText\n got %q\n want %q", got.SearchText, tc.wantSearchText)
}
})
}
}

View File

@@ -0,0 +1,131 @@
package matching
import (
"fuj-management/go/internal/domain/czech"
"strings"
)
// Confidence indicates how certain a member match is.
type Confidence string
const (
ConfidenceAuto Confidence = "auto"
ConfidenceReview Confidence = "review"
)
// Match pairs a canonical member name with the confidence of the match.
type Match struct {
Name string
Confidence Confidence
}
var commonSurnames = map[string]bool{
"novak": true,
"novakova": true,
"prach": true,
}
// MatchMembers finds members mentioned in text and returns them with a
// confidence level of "auto" (reliable) or "review" (needs human verification).
//
// Algorithm (ported verbatim from scripts/match_payments.py match_members):
// 1. Exact short-circuit: if any member's full normalized name appears as whole
// words in normalize(text), return ONLY those matches as auto. This prevents
// nickname "tov" from matching inside surname "ottova".
// 2. Per-member first-match-wins: full-name substring → first+last both present
// (any order) → nickname whole-word. Each yields auto.
// 3. Review tier: last name (len≥4, not a common surname) → first name (len≥3)
// → single-part name (len≥4). Each yields review.
// 4. Final filter: if any auto exists, drop all review.
func MatchMembers(text string, memberNames []string) []Match {
normalizedText := czech.Normalize(text)
// Pass 1: exact short-circuit
var exactMatches []Match
for _, name := range memberNames {
variants := BuildNameVariants(name)
if len(variants) == 0 {
continue
}
fullName := variants[0]
if fullName != "" && wordIn(fullName, normalizedText) {
exactMatches = append(exactMatches, Match{Name: name, Confidence: ConfidenceAuto})
}
}
if len(exactMatches) > 0 {
return exactMatches
}
// Pass 2 + 3: fuzzy matching
var matches []Match
for _, name := range memberNames {
variants := BuildNameVariants(name)
fullName := ""
if len(variants) > 0 {
fullName = variants[0]
}
parts := strings.Fields(fullName)
// Auto tier
if fullName != "" && strings.Contains(normalizedText, fullName) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
if len(parts) >= 2 {
if wordIn(parts[0], normalizedText) && wordIn(parts[len(parts)-1], normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
}
// Nickname check
if m := nicknameRe.FindStringSubmatch(name); m != nil {
nick := czech.Normalize(m[1])
if nick != "" && wordIn(nick, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceAuto})
continue
}
}
// Review tier
if len(parts) >= 2 {
lastName := parts[len(parts)-1]
firstName := parts[0]
if len(lastName) >= 4 && !commonSurnames[lastName] && wordIn(lastName, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
if len(firstName) >= 3 && wordIn(firstName, normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
} else if len(parts) == 1 {
if len(parts[0]) >= 4 && wordIn(parts[0], normalizedText) {
matches = append(matches, Match{Name: name, Confidence: ConfidenceReview})
continue
}
}
}
// Final filter: drop review if any auto exists
hasAuto := false
for _, m := range matches {
if m.Confidence == ConfidenceAuto {
hasAuto = true
break
}
}
if hasAuto {
filtered := matches[:0]
for _, m := range matches {
if m.Confidence == ConfidenceAuto {
filtered = append(filtered, m)
}
}
return filtered
}
if matches == nil {
return []Match{}
}
return matches
}

View File

@@ -0,0 +1,156 @@
package matching
// Expected values verified against scripts/match_payments.py and
// tests/test_match_members.py on 2026-05-06:
//
// PYTHONPATH=scripts:. python3 -c '
// from match_payments import match_members
// MEMBERS = ["Henrietta Ottová", "Tomáš Němeček (Tov)", "František Vrbík (Štrúdl)", "Jana Nováková"]
// cases = [
// ("Henrietta Ottová (Heny): 04/2026", "full name guard"),
// ("platba ottova 04/2026", "ottova surname"),
// ("Henrietta Ottová a Tomáš Němeček 04/2026", "two full names"),
// ("Tov platba 04/2026", "nickname alone"),
// ("Henrietta Ottova 04/2026", "no diacritics"),
// ("Platba od Nemeček Tomas 04/2026", "reversed first+last"),
// ("vrbik clenske", "last name only review"),
// ("jana platba", "first name review"),
// ("neznamy platebce", "no match"),
// ]
// for text, label in cases: print(label + ":", match_members(text, MEMBERS))
// '
//
// Output:
//
// full name guard: [('Henrietta Ottová', 'auto')]
// ottova surname: [('Henrietta Ottová', 'review')]
// two full names: [('Henrietta Ottová', 'auto'), ('Tomáš Němeček (Tov)', 'auto')]
// nickname alone: [('Tomáš Němeček (Tov)', 'auto')]
// no diacritics: [('Henrietta Ottová', 'auto')]
// reversed first+last: [('Tomáš Němeček (Tov)', 'auto')]
// last name only review: [('František Vrbík (Štrúdl)', 'review')]
// first name review: [('Jana Nováková', 'review')]
// no match: []
import (
"testing"
)
var testMembers = []string{
"Henrietta Ottová",
"Tomáš Němeček (Tov)",
"František Vrbík (Štrúdl)",
"Jana Nováková",
}
func TestMatchMembers(t *testing.T) {
t.Parallel()
cases := []struct {
name string
text string
wantContains []string
wantExcludes []string
wantAllAuto bool
}{
{
// Short-circuit: full name matches → "tov" inside "ottova" must NOT fire
name: "full name in message returns only that member",
text: "Henrietta Ottová (Heny): 04/2026",
wantContains: []string{"Henrietta Ottová"},
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
// "tov" is a substring of "ottova" — nickname must not match inside a surname
name: "nickname tov not matched inside ottova",
text: "platba ottova 04/2026",
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: false,
},
{
name: "two full names both auto",
text: "Henrietta Ottová a Tomáš Němeček 04/2026",
wantContains: []string{"Henrietta Ottová", "Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "nickname alone matches correctly",
text: "Tov platba 04/2026",
wantContains: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "full name without diacritics auto",
text: "Henrietta Ottova 04/2026",
wantContains: []string{"Henrietta Ottová"},
wantExcludes: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
name: "first and last name reversed auto",
text: "Platba od Nemeček Tomas 04/2026",
wantContains: []string{"Tomáš Němeček (Tov)"},
wantAllAuto: true,
},
{
// Last name alone (len≥4, not a common surname) → review confidence
name: "last name only yields review",
text: "vrbik clenske",
wantContains: []string{"František Vrbík (Štrúdl)"},
wantAllAuto: false,
},
{
// First name alone (len≥3) → review confidence
name: "first name only yields review",
text: "jana platba",
wantContains: []string{"Jana Nováková"},
wantAllAuto: false,
},
{
name: "no match returns empty slice",
text: "neznamy platebce",
wantContains: nil,
wantAllAuto: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := MatchMembers(tc.text, testMembers)
// Check required members are present
for _, want := range tc.wantContains {
found := false
for _, m := range got {
if m.Name == want {
found = true
break
}
}
if !found {
t.Errorf("MatchMembers(%q): want %q in result, got %v", tc.text, want, got)
}
}
// Check excluded members are absent
for _, exclude := range tc.wantExcludes {
for _, m := range got {
if m.Name == exclude {
t.Errorf("MatchMembers(%q): %q should not be in result, got %v", tc.text, exclude, got)
}
}
}
// Check all-auto constraint
if tc.wantAllAuto {
for _, m := range got {
if m.Confidence != ConfidenceAuto {
t.Errorf("MatchMembers(%q): expected all auto, got %v", tc.text, got)
}
}
}
})
}
}

View File

@@ -0,0 +1,59 @@
package matching
import (
"fuj-management/go/internal/domain/czech"
"regexp"
"strings"
)
var (
nicknameRe = regexp.MustCompile(`\(([^)]+)\)`)
nicknameStripRe = regexp.MustCompile(`\s*\([^)]*\)\s*`)
)
// BuildNameVariants returns searchable lowercase ASCII variants of a member name.
//
// Example: "František Vrbík (Štrúdl)" → ["frantisek vrbik", "strudl", "vrbik", "frantisek"]
//
// variants[0] is always the full normalized base name (no nickname). MatchMembers relies on
// this invariant for the exact short-circuit pass. Variants shorter than 3 characters are
// dropped.
//
// Ports scripts/match_payments.py _build_name_variants.
func BuildNameVariants(name string) []string {
var nickname string
if m := nicknameRe.FindStringSubmatch(name); m != nil {
nickname = m[1]
}
base := strings.TrimSpace(nicknameStripRe.ReplaceAllString(name, " "))
normalizedBase := czech.Normalize(base)
normalizedNick := czech.Normalize(nickname)
variants := []string{normalizedBase}
if normalizedNick != "" {
variants = append(variants, normalizedNick)
}
parts := strings.Fields(normalizedBase)
if len(parts) >= 2 {
variants = append(variants, parts[len(parts)-1]) // last name
variants = append(variants, parts[0]) // first name
}
filtered := variants[:0]
for _, v := range variants {
if len(v) >= 3 {
filtered = append(filtered, v)
}
}
return filtered
}
// wordIn returns true if needle appears as a whole word in haystack.
// Both needle and haystack must already be ASCII-folded (via czech.Normalize).
func wordIn(needle, haystack string) bool {
pattern := `\b` + regexp.QuoteMeta(needle) + `\b`
matched, _ := regexp.MatchString(pattern, haystack)
return matched
}

View File

@@ -0,0 +1,62 @@
package matching
// Expected values verified against scripts/match_payments.py on 2026-05-06:
//
// PYTHONPATH=scripts:. python3 -c '
// from match_payments import _build_name_variants
// for n in ["František Vrbík (Štrúdl)", "Tov (St)", "Jana", " Petr Novák ( Jenda ) "]:
// print(repr(n), "->", _build_name_variants(n))
// '
//
// Output:
//
// 'František Vrbík (Štrúdl)' -> ['frantisek vrbik', 'strudl', 'vrbik', 'frantisek']
// 'Tov (St)' -> ['tov']
// 'Jana' -> ['jana']
// ' Petr Novák ( Jenda ) ' -> ['petr novak', ' jenda ', 'novak', 'petr']
import (
"reflect"
"testing"
)
func TestBuildNameVariants(t *testing.T) {
t.Parallel()
cases := []struct {
name string
input string
want []string
}{
{
name: "full name with nickname",
input: "František Vrbík (Štrúdl)",
want: []string{"frantisek vrbik", "strudl", "vrbik", "frantisek"},
},
{
name: "nickname too short filtered out",
input: "Tov (St)",
want: []string{"tov"},
},
{
name: "single-part name no nickname",
input: "Jana",
want: []string{"jana"},
},
{
name: "extra whitespace inside parens preserved by normalize",
input: " Petr Novák ( Jenda ) ",
want: []string{"petr novak", " jenda ", "novak", "petr"},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := BuildNameVariants(tc.input)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("BuildNameVariants(%q)\n got %q\n want %q", tc.input, got, tc.want)
}
})
}
}