fix(go/fio): nested-table early exit + non-padded date parsing
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s

extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.

parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.

Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-07 10:47:54 +02:00
parent 8275db1a63
commit fcb83691f5
3 changed files with 40 additions and 9 deletions

View File

@@ -95,6 +95,8 @@ func TestParseCzechDate(t *testing.T) {
cases := []struct{ in, want string }{ cases := []struct{ in, want string }{
{"10.04.2026", "2026-04-10"}, {"10.04.2026", "2026-04-10"},
{"10/04/2026", "2026-04-10"}, {"10/04/2026", "2026-04-10"},
{"7.5.2026", "2026-05-07"}, // non-padded — real Fio transparent page format
{"3.12.2025", "2025-12-03"}, // non-padded single-digit day, double-digit month
{"", ""}, {"", ""},
{"invalid", ""}, {"invalid", ""},
} }
@@ -105,6 +107,22 @@ func TestParseCzechDate(t *testing.T) {
} }
} }
func TestExtractSecondTableRows_NestedTable(t *testing.T) {
// Regression: a nested <table> inside the target must not cause early exit.
html := `<table class="table"><tr><td>nav</td></tr></table>
<table class="table">
<thead><tr><th>Date</th></tr></thead>
<tbody>
<tr><td>7.5.2026</td><td><table><tr><td>nested</td></tr></table></td></tr>
<tr><td>6.5.2026</td><td></td></tr>
</tbody>
</table>`
rows := extractSecondTableRows([]byte(html))
if len(rows) != 2 {
t.Errorf("want 2 data rows, got %d: %v", len(rows), rows)
}
}
func TestParseCzechAmount(t *testing.T) { func TestParseCzechAmount(t *testing.T) {
cases := []struct { cases := []struct {
in string in string

View File

@@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) {
// extractSecondTableRows walks the HTML token stream and returns data rows // extractSecondTableRows walks the HTML token stream and returns data rows
// from the second <table class="table"> element, skipping the <thead>. // from the second <table class="table"> element, skipping the <thead>.
// It tracks nesting depth so that nested <table> elements inside the target
// do not trigger an early exit.
func extractSecondTableRows(body []byte) [][]string { func extractSecondTableRows(body []byte) [][]string {
z := ghtml.NewTokenizer(strings.NewReader(string(body))) z := ghtml.NewTokenizer(strings.NewReader(string(body)))
tableCount := 0 tableCount := 0
inTarget := false targetDepth := 0 // >0 while inside the target table (handles nesting)
inThead := false inThead := false
inRow := false inRow := false
inCell := false inCell := false
@@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string {
t := z.Token() t := z.Token()
switch t.Data { switch t.Data {
case "table": case "table":
if hasClass(t, "table") { if targetDepth > 0 {
targetDepth++ // nested table inside target; track so </table> doesn't exit early
} else if hasClass(t, "table") {
tableCount++ tableCount++
if tableCount == 2 { if tableCount == 2 {
inTarget = true targetDepth = 1
} }
} }
case "thead": case "thead":
if inTarget { if targetDepth > 0 {
inThead = true inThead = true
} }
case "tr": case "tr":
if inTarget && !inThead { if targetDepth > 0 && !inThead {
inRow = true inRow = true
currentRow = nil currentRow = nil
} }
@@ -152,10 +156,13 @@ func extractSecondTableRows(body []byte) [][]string {
inRow = false inRow = false
} }
case "table": case "table":
if inTarget { if targetDepth > 0 {
targetDepth--
if targetDepth == 0 {
return rows return rows
} }
} }
}
case ghtml.TextToken: case ghtml.TextToken:
if inCell { if inCell {
cellBuf.WriteString(z.Token().Data) cellBuf.WriteString(z.Token().Data)
@@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool {
return false return false
} }
// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD". // parseCzechDate parses Czech date strings → "YYYY-MM-DD".
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
// with dot or slash separators, as the Fio transparent page omits leading zeros.
// Returns "" on parse error. // Returns "" on parse error.
func parseCzechDate(s string) string { func parseCzechDate(s string) string {
s = strings.TrimSpace(s) s = strings.TrimSpace(s)
for _, layout := range []string{"02.01.2006", "02/01/2006"} { for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
if t, err := time.Parse(layout, s); err == nil { if t, err := time.Parse(layout, s); err == nil {
return t.Format("2006-01-02") return t.Format("2006-01-02")
} }

View File

@@ -87,6 +87,10 @@ func SyncToSheets(
if err != nil { if err != nil {
return 0, fmt.Errorf("sync: fetch fio: %w", err) return 0, fmt.Errorf("sync: fetch fio: %w", err)
} }
if opts.DryRun {
fmt.Printf("Dry run: window %s to %s, fetched %d transaction(s) from Fio\n",
from.Format("2006-01-02"), to.Format("2006-01-02"), len(txns))
}
// 4. Append new rows. // 4. Append new rows.
var newRows [][]any var newRows [][]any