All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Adds internal/domain/czech.Normalize, the first pure-domain function in the Go rewrite (M2 milestone). Matches Python czech_utils.normalize byte- for-byte: NFKD decompose via golang.org/x/text/unicode/norm, drop Mn- category combining marks (unicode.Mn, not IsMark, to match Python's unicodedata.combining() semantics), then strings.ToLower. Includes 13-case table-driven test; all inputs spot-checked against the Python implementation before locking. Adds golang.org/x/text v0.36.0 as first external dependency. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
32 lines
812 B
Go
32 lines
812 B
Go
package czech
|
|
|
|
import "testing"
|
|
|
|
func TestNormalize(t *testing.T) {
|
|
cases := []struct {
|
|
in string
|
|
want string
|
|
}{
|
|
{"Honza", "honza"},
|
|
{"žluťoučký", "zlutoucky"},
|
|
{"Příliš", "prilis"},
|
|
{"Dvořák", "dvorak"},
|
|
{"Růžena", "ruzena"},
|
|
{"Čeněk", "cenek"},
|
|
{"Kačer", "kacer"},
|
|
{"", ""},
|
|
{"prilis", "prilis"}, // idempotent
|
|
{"Jan Novák", "jan novak"}, // whitespace preserved
|
|
{"é", "e"}, // precomposed é (NFC)
|
|
{"é", "e"}, // decomposed e + combining acute
|
|
{"Ondřej Procházka", "ondrej prochazka"}, // realistic full name
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
got := Normalize(tc.in)
|
|
if got != tc.want {
|
|
t.Errorf("Normalize(%q) = %q, want %q", tc.in, got, tc.want)
|
|
}
|
|
}
|
|
}
|