feat(go/M2.1): port czech.Normalize — NFKD + Mn strip + lowercase
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Adds internal/domain/czech.Normalize, the first pure-domain function in the Go rewrite (M2 milestone). Matches Python czech_utils.normalize byte- for-byte: NFKD decompose via golang.org/x/text/unicode/norm, drop Mn- category combining marks (unicode.Mn, not IsMark, to match Python's unicodedata.combining() semantics), then strings.ToLower. Includes 13-case table-driven test; all inputs spot-checked against the Python implementation before locking. Adds golang.org/x/text v0.36.0 as first external dependency. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
module fuj-management/go
|
||||
|
||||
go 1.26.1
|
||||
|
||||
require golang.org/x/text v0.36.0
|
||||
|
||||
2
go/go.sum
Normal file
2
go/go.sum
Normal file
@@ -0,0 +1,2 @@
|
||||
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
|
||||
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
|
||||
26
go/internal/domain/czech/normalize.go
Normal file
26
go/internal/domain/czech/normalize.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package czech
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// Normalize strips diacritics and lowercases s.
|
||||
//
|
||||
// Matches Python: unicodedata.normalize("NFKD", s) then filter out
|
||||
// combining characters (unicode.Mn only — not Mc/Me, which have
|
||||
// combining class 0 in Python's unicodedata.combining()).
|
||||
func Normalize(s string) string {
|
||||
decomposed := norm.NFKD.String(s)
|
||||
var b strings.Builder
|
||||
b.Grow(len(decomposed))
|
||||
for _, r := range decomposed {
|
||||
if unicode.In(r, unicode.Mn) {
|
||||
continue
|
||||
}
|
||||
b.WriteRune(r)
|
||||
}
|
||||
return strings.ToLower(b.String())
|
||||
}
|
||||
31
go/internal/domain/czech/normalize_test.go
Normal file
31
go/internal/domain/czech/normalize_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package czech
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestNormalize(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"Honza", "honza"},
|
||||
{"žluťoučký", "zlutoucky"},
|
||||
{"Příliš", "prilis"},
|
||||
{"Dvořák", "dvorak"},
|
||||
{"Růžena", "ruzena"},
|
||||
{"Čeněk", "cenek"},
|
||||
{"Kačer", "kacer"},
|
||||
{"", ""},
|
||||
{"prilis", "prilis"}, // idempotent
|
||||
{"Jan Novák", "jan novak"}, // whitespace preserved
|
||||
{"é", "e"}, // precomposed é (NFC)
|
||||
{"é", "e"}, // decomposed e + combining acute
|
||||
{"Ondřej Procházka", "ondrej prochazka"}, // realistic full name
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := Normalize(tc.in)
|
||||
if got != tc.want {
|
||||
t.Errorf("Normalize(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user