From fcb83691f5835aa592189012db5a4fd5c7d15d46 Mon Sep 17 00:00:00 2001 From: Jan Novak Date: Thu, 7 May 2026 10:47:54 +0200 Subject: [PATCH] fix(go/fio): nested-table early exit + non-padded date parsing extractSecondTableRows tracked a boolean inTarget flag and exited on the first token while inside the target. Any nested (e.g. pagination markup in the real Fio page) would cause an early return before reading any data rows, explaining the 0-transaction report. Fixed by tracking targetDepth instead: depth increments on every
inside the target and we only return when it reaches 0 again. parseCzechDate also only tried zero-padded layouts ("02.01.2006"). The real Fio transparent page emits non-padded dates ("7.5.2026"); added "2.1.2006" and "2/1/2006" as the preferred layouts. Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio") so the fetch vs dedup split is visible without reading logs. Co-Authored-By: Claude Opus 4.7 --- go/internal/io/fio/fio_test.go | 18 ++++++++++++++++++ go/internal/io/fio/transparent.go | 27 ++++++++++++++++++--------- go/internal/services/banksync/sync.go | 4 ++++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/go/internal/io/fio/fio_test.go b/go/internal/io/fio/fio_test.go index 03774ef..7a3e38c 100644 --- a/go/internal/io/fio/fio_test.go +++ b/go/internal/io/fio/fio_test.go @@ -95,6 +95,8 @@ func TestParseCzechDate(t *testing.T) { cases := []struct{ in, want string }{ {"10.04.2026", "2026-04-10"}, {"10/04/2026", "2026-04-10"}, + {"7.5.2026", "2026-05-07"}, // non-padded — real Fio transparent page format + {"3.12.2025", "2025-12-03"}, // non-padded single-digit day, double-digit month {"", ""}, {"invalid", ""}, } @@ -105,6 +107,22 @@ func TestParseCzechDate(t *testing.T) { } } +func TestExtractSecondTableRows_NestedTable(t *testing.T) { + // Regression: a nested
inside the target must not cause early exit. + html := `
nav
+ + + + + + +
Date
7.5.2026
nested
6.5.2026
` + rows := extractSecondTableRows([]byte(html)) + if len(rows) != 2 { + t.Errorf("want 2 data rows, got %d: %v", len(rows), rows) + } +} + func TestParseCzechAmount(t *testing.T) { cases := []struct { in string diff --git a/go/internal/io/fio/transparent.go b/go/internal/io/fio/transparent.go index 9c74023..29ff7df 100644 --- a/go/internal/io/fio/transparent.go +++ b/go/internal/io/fio/transparent.go @@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) { // extractSecondTableRows walks the HTML token stream and returns data rows // from the second element, skipping the . +// It tracks nesting depth so that nested
elements inside the target +// do not trigger an early exit. func extractSecondTableRows(body []byte) [][]string { z := ghtml.NewTokenizer(strings.NewReader(string(body))) tableCount := 0 - inTarget := false + targetDepth := 0 // >0 while inside the target table (handles nesting) inThead := false inRow := false inCell := false @@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string { t := z.Token() switch t.Data { case "table": - if hasClass(t, "table") { + if targetDepth > 0 { + targetDepth++ // nested table inside target; track so
doesn't exit early + } else if hasClass(t, "table") { tableCount++ if tableCount == 2 { - inTarget = true + targetDepth = 1 } } case "thead": - if inTarget { + if targetDepth > 0 { inThead = true } case "tr": - if inTarget && !inThead { + if targetDepth > 0 && !inThead { inRow = true currentRow = nil } @@ -152,8 +156,11 @@ func extractSecondTableRows(body []byte) [][]string { inRow = false } case "table": - if inTarget { - return rows + if targetDepth > 0 { + targetDepth-- + if targetDepth == 0 { + return rows + } } } case ghtml.TextToken: @@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool { return false } -// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD". +// parseCzechDate parses Czech date strings → "YYYY-MM-DD". +// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants +// with dot or slash separators, as the Fio transparent page omits leading zeros. // Returns "" on parse error. func parseCzechDate(s string) string { s = strings.TrimSpace(s) - for _, layout := range []string{"02.01.2006", "02/01/2006"} { + for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} { if t, err := time.Parse(layout, s); err == nil { return t.Format("2006-01-02") } diff --git a/go/internal/services/banksync/sync.go b/go/internal/services/banksync/sync.go index a48b63e..a036c2e 100644 --- a/go/internal/services/banksync/sync.go +++ b/go/internal/services/banksync/sync.go @@ -87,6 +87,10 @@ func SyncToSheets( if err != nil { return 0, fmt.Errorf("sync: fetch fio: %w", err) } + if opts.DryRun { + fmt.Printf("Dry run: window %s to %s, fetched %d transaction(s) from Fio\n", + from.Format("2006-01-02"), to.Format("2006-01-02"), len(txns)) + } // 4. Append new rows. var newRows [][]any