fix(go/fio): nested-table early exit + non-padded date parsing
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.
parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.
Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -95,6 +95,8 @@ func TestParseCzechDate(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
{"10.04.2026", "2026-04-10"},
|
||||
{"10/04/2026", "2026-04-10"},
|
||||
{"7.5.2026", "2026-05-07"}, // non-padded — real Fio transparent page format
|
||||
{"3.12.2025", "2025-12-03"}, // non-padded single-digit day, double-digit month
|
||||
{"", ""},
|
||||
{"invalid", ""},
|
||||
}
|
||||
@@ -105,6 +107,22 @@ func TestParseCzechDate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSecondTableRows_NestedTable(t *testing.T) {
|
||||
// Regression: a nested <table> inside the target must not cause early exit.
|
||||
html := `<table class="table"><tr><td>nav</td></tr></table>
|
||||
<table class="table">
|
||||
<thead><tr><th>Date</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td>7.5.2026</td><td><table><tr><td>nested</td></tr></table></td></tr>
|
||||
<tr><td>6.5.2026</td><td></td></tr>
|
||||
</tbody>
|
||||
</table>`
|
||||
rows := extractSecondTableRows([]byte(html))
|
||||
if len(rows) != 2 {
|
||||
t.Errorf("want 2 data rows, got %d: %v", len(rows), rows)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCzechAmount(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
|
||||
@@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) {
|
||||
|
||||
// extractSecondTableRows walks the HTML token stream and returns data rows
|
||||
// from the second <table class="table"> element, skipping the <thead>.
|
||||
// It tracks nesting depth so that nested <table> elements inside the target
|
||||
// do not trigger an early exit.
|
||||
func extractSecondTableRows(body []byte) [][]string {
|
||||
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
|
||||
|
||||
tableCount := 0
|
||||
inTarget := false
|
||||
targetDepth := 0 // >0 while inside the target table (handles nesting)
|
||||
inThead := false
|
||||
inRow := false
|
||||
inCell := false
|
||||
@@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string {
|
||||
t := z.Token()
|
||||
switch t.Data {
|
||||
case "table":
|
||||
if hasClass(t, "table") {
|
||||
if targetDepth > 0 {
|
||||
targetDepth++ // nested table inside target; track so </table> doesn't exit early
|
||||
} else if hasClass(t, "table") {
|
||||
tableCount++
|
||||
if tableCount == 2 {
|
||||
inTarget = true
|
||||
targetDepth = 1
|
||||
}
|
||||
}
|
||||
case "thead":
|
||||
if inTarget {
|
||||
if targetDepth > 0 {
|
||||
inThead = true
|
||||
}
|
||||
case "tr":
|
||||
if inTarget && !inThead {
|
||||
if targetDepth > 0 && !inThead {
|
||||
inRow = true
|
||||
currentRow = nil
|
||||
}
|
||||
@@ -152,8 +156,11 @@ func extractSecondTableRows(body []byte) [][]string {
|
||||
inRow = false
|
||||
}
|
||||
case "table":
|
||||
if inTarget {
|
||||
return rows
|
||||
if targetDepth > 0 {
|
||||
targetDepth--
|
||||
if targetDepth == 0 {
|
||||
return rows
|
||||
}
|
||||
}
|
||||
}
|
||||
case ghtml.TextToken:
|
||||
@@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD".
|
||||
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
|
||||
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
|
||||
// with dot or slash separators, as the Fio transparent page omits leading zeros.
|
||||
// Returns "" on parse error.
|
||||
func parseCzechDate(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
for _, layout := range []string{"02.01.2006", "02/01/2006"} {
|
||||
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
|
||||
if t, err := time.Parse(layout, s); err == nil {
|
||||
return t.Format("2006-01-02")
|
||||
}
|
||||
|
||||
@@ -87,6 +87,10 @@ func SyncToSheets(
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("sync: fetch fio: %w", err)
|
||||
}
|
||||
if opts.DryRun {
|
||||
fmt.Printf("Dry run: window %s to %s, fetched %d transaction(s) from Fio\n",
|
||||
from.Format("2006-01-02"), to.Format("2006-01-02"), len(txns))
|
||||
}
|
||||
|
||||
// 4. Append new rows.
|
||||
var newRows [][]any
|
||||
|
||||
Reference in New Issue
Block a user