feat(go/M2.1): port czech.Normalize — NFKD + Mn strip + lowercase

Adds internal/domain/czech.Normalize, the first pure-domain function in the Go rewrite (M2 milestone). Matches Python czech_utils.normalize byte- for-byte: NFKD decompose via golang.org/x/text/unicode/norm, drop Mn- category combining marks (unicode.Mn, not IsMark, to match Python's unicodedata.combining() semantics), then strings.ToLower. Includes 13-case table-driven test; all inputs spot-checked against the Python implementation before locking. Adds golang.org/x/text v0.36.0 as first external dependency. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-05 22:23:40 +02:00
parent 91ac3b37cf
commit d9a61b338c
5 changed files with 215 additions and 0 deletions
--- a/go/go.mod
+++ b/go/go.mod
@@ -1,3 +1,5 @@
 module fuj-management/go

 go 1.26.1
+
+require golang.org/x/text v0.36.0
--- a/go/go.sum
+++ b/go/go.sum
@@ -0,0 +1,2 @@
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
--- a/go/internal/domain/czech/normalize.go
+++ b/go/internal/domain/czech/normalize.go
@@ -0,0 +1,26 @@
+package czech
+
+import (
+	"strings"
+	"unicode"
+
+	"golang.org/x/text/unicode/norm"
+)
+
+// Normalize strips diacritics and lowercases s.
+//
+// Matches Python: unicodedata.normalize("NFKD", s) then filter out
+// combining characters (unicode.Mn only — not Mc/Me, which have
+// combining class 0 in Python's unicodedata.combining()).
+func Normalize(s string) string {
+	decomposed := norm.NFKD.String(s)
+	var b strings.Builder
+	b.Grow(len(decomposed))
+	for _, r := range decomposed {
+		if unicode.In(r, unicode.Mn) {
+			continue
+		}
+		b.WriteRune(r)
+	}
+	return strings.ToLower(b.String())
+}
--- a/go/internal/domain/czech/normalize_test.go
+++ b/go/internal/domain/czech/normalize_test.go
@@ -0,0 +1,31 @@
+package czech
+
+import "testing"
+
+func TestNormalize(t *testing.T) {
+	cases := []struct {
+		in   string
+		want string
+	}{
+		{"Honza", "honza"},
+		{"žluťoučký", "zlutoucky"},
+		{"Příliš", "prilis"},
+		{"Dvořák", "dvorak"},
+		{"Růžena", "ruzena"},
+		{"Čeněk", "cenek"},
+		{"Kačer", "kacer"},
+		{"", ""},
+		{"prilis", "prilis"},                     // idempotent
+		{"Jan Novák", "jan novak"},               // whitespace preserved
+		{"é", "e"},                               // precomposed é (NFC)
+		{"é", "e"},                              // decomposed e + combining acute
+		{"Ondřej Procházka", "ondrej prochazka"}, // realistic full name
+	}
+
+	for _, tc := range cases {
+		got := Normalize(tc.in)
+		if got != tc.want {
+			t.Errorf("Normalize(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}