feat(go/M2.1): port czech.Normalize — NFKD + Mn strip + lowercase
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s

Adds internal/domain/czech.Normalize, the first pure-domain function in
the Go rewrite (M2 milestone). Matches Python czech_utils.normalize byte-
for-byte: NFKD decompose via golang.org/x/text/unicode/norm, drop Mn-
category combining marks (unicode.Mn, not IsMark, to match Python's
unicodedata.combining() semantics), then strings.ToLower.

Includes 13-case table-driven test; all inputs spot-checked against the
Python implementation before locking. Adds golang.org/x/text v0.36.0 as
first external dependency.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 22:23:40 +02:00
parent 91ac3b37cf
commit d9a61b338c
5 changed files with 215 additions and 0 deletions

View File

@@ -1,3 +1,5 @@
module fuj-management/go
go 1.26.1
require golang.org/x/text v0.36.0

2
go/go.sum Normal file
View File

@@ -0,0 +1,2 @@
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=

View File

@@ -0,0 +1,26 @@
package czech
import (
"strings"
"unicode"
"golang.org/x/text/unicode/norm"
)
// Normalize strips diacritics and lowercases s.
//
// Matches Python: unicodedata.normalize("NFKD", s) then filter out
// combining characters (unicode.Mn only — not Mc/Me, which have
// combining class 0 in Python's unicodedata.combining()).
func Normalize(s string) string {
decomposed := norm.NFKD.String(s)
var b strings.Builder
b.Grow(len(decomposed))
for _, r := range decomposed {
if unicode.In(r, unicode.Mn) {
continue
}
b.WriteRune(r)
}
return strings.ToLower(b.String())
}

View File

@@ -0,0 +1,31 @@
package czech
import "testing"
func TestNormalize(t *testing.T) {
cases := []struct {
in string
want string
}{
{"Honza", "honza"},
{"žluťoučký", "zlutoucky"},
{"Příliš", "prilis"},
{"Dvořák", "dvorak"},
{"Růžena", "ruzena"},
{"Čeněk", "cenek"},
{"Kačer", "kacer"},
{"", ""},
{"prilis", "prilis"}, // idempotent
{"Jan Novák", "jan novak"}, // whitespace preserved
{"é", "e"}, // precomposed é (NFC)
{"é", "e"}, // decomposed e + combining acute
{"Ondřej Procházka", "ondrej prochazka"}, // realistic full name
}
for _, tc := range cases {
got := Normalize(tc.in)
if got != tc.want {
t.Errorf("Normalize(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}