fix(go/fio): nested-table early exit + non-padded date parsing
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.
parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.
Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -95,6 +95,8 @@ func TestParseCzechDate(t *testing.T) {
|
|||||||
cases := []struct{ in, want string }{
|
cases := []struct{ in, want string }{
|
||||||
{"10.04.2026", "2026-04-10"},
|
{"10.04.2026", "2026-04-10"},
|
||||||
{"10/04/2026", "2026-04-10"},
|
{"10/04/2026", "2026-04-10"},
|
||||||
|
{"7.5.2026", "2026-05-07"}, // non-padded — real Fio transparent page format
|
||||||
|
{"3.12.2025", "2025-12-03"}, // non-padded single-digit day, double-digit month
|
||||||
{"", ""},
|
{"", ""},
|
||||||
{"invalid", ""},
|
{"invalid", ""},
|
||||||
}
|
}
|
||||||
@@ -105,6 +107,22 @@ func TestParseCzechDate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractSecondTableRows_NestedTable(t *testing.T) {
|
||||||
|
// Regression: a nested <table> inside the target must not cause early exit.
|
||||||
|
html := `<table class="table"><tr><td>nav</td></tr></table>
|
||||||
|
<table class="table">
|
||||||
|
<thead><tr><th>Date</th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
<tr><td>7.5.2026</td><td><table><tr><td>nested</td></tr></table></td></tr>
|
||||||
|
<tr><td>6.5.2026</td><td></td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>`
|
||||||
|
rows := extractSecondTableRows([]byte(html))
|
||||||
|
if len(rows) != 2 {
|
||||||
|
t.Errorf("want 2 data rows, got %d: %v", len(rows), rows)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseCzechAmount(t *testing.T) {
|
func TestParseCzechAmount(t *testing.T) {
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
in string
|
in string
|
||||||
|
|||||||
@@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) {
|
|||||||
|
|
||||||
// extractSecondTableRows walks the HTML token stream and returns data rows
|
// extractSecondTableRows walks the HTML token stream and returns data rows
|
||||||
// from the second <table class="table"> element, skipping the <thead>.
|
// from the second <table class="table"> element, skipping the <thead>.
|
||||||
|
// It tracks nesting depth so that nested <table> elements inside the target
|
||||||
|
// do not trigger an early exit.
|
||||||
func extractSecondTableRows(body []byte) [][]string {
|
func extractSecondTableRows(body []byte) [][]string {
|
||||||
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
|
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
|
||||||
|
|
||||||
tableCount := 0
|
tableCount := 0
|
||||||
inTarget := false
|
targetDepth := 0 // >0 while inside the target table (handles nesting)
|
||||||
inThead := false
|
inThead := false
|
||||||
inRow := false
|
inRow := false
|
||||||
inCell := false
|
inCell := false
|
||||||
@@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string {
|
|||||||
t := z.Token()
|
t := z.Token()
|
||||||
switch t.Data {
|
switch t.Data {
|
||||||
case "table":
|
case "table":
|
||||||
if hasClass(t, "table") {
|
if targetDepth > 0 {
|
||||||
|
targetDepth++ // nested table inside target; track so </table> doesn't exit early
|
||||||
|
} else if hasClass(t, "table") {
|
||||||
tableCount++
|
tableCount++
|
||||||
if tableCount == 2 {
|
if tableCount == 2 {
|
||||||
inTarget = true
|
targetDepth = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "thead":
|
case "thead":
|
||||||
if inTarget {
|
if targetDepth > 0 {
|
||||||
inThead = true
|
inThead = true
|
||||||
}
|
}
|
||||||
case "tr":
|
case "tr":
|
||||||
if inTarget && !inThead {
|
if targetDepth > 0 && !inThead {
|
||||||
inRow = true
|
inRow = true
|
||||||
currentRow = nil
|
currentRow = nil
|
||||||
}
|
}
|
||||||
@@ -152,10 +156,13 @@ func extractSecondTableRows(body []byte) [][]string {
|
|||||||
inRow = false
|
inRow = false
|
||||||
}
|
}
|
||||||
case "table":
|
case "table":
|
||||||
if inTarget {
|
if targetDepth > 0 {
|
||||||
|
targetDepth--
|
||||||
|
if targetDepth == 0 {
|
||||||
return rows
|
return rows
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
case ghtml.TextToken:
|
case ghtml.TextToken:
|
||||||
if inCell {
|
if inCell {
|
||||||
cellBuf.WriteString(z.Token().Data)
|
cellBuf.WriteString(z.Token().Data)
|
||||||
@@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD".
|
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
|
||||||
|
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
|
||||||
|
// with dot or slash separators, as the Fio transparent page omits leading zeros.
|
||||||
// Returns "" on parse error.
|
// Returns "" on parse error.
|
||||||
func parseCzechDate(s string) string {
|
func parseCzechDate(s string) string {
|
||||||
s = strings.TrimSpace(s)
|
s = strings.TrimSpace(s)
|
||||||
for _, layout := range []string{"02.01.2006", "02/01/2006"} {
|
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
|
||||||
if t, err := time.Parse(layout, s); err == nil {
|
if t, err := time.Parse(layout, s); err == nil {
|
||||||
return t.Format("2006-01-02")
|
return t.Format("2006-01-02")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -87,6 +87,10 @@ func SyncToSheets(
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("sync: fetch fio: %w", err)
|
return 0, fmt.Errorf("sync: fetch fio: %w", err)
|
||||||
}
|
}
|
||||||
|
if opts.DryRun {
|
||||||
|
fmt.Printf("Dry run: window %s to %s, fetched %d transaction(s) from Fio\n",
|
||||||
|
from.Format("2006-01-02"), to.Format("2006-01-02"), len(txns))
|
||||||
|
}
|
||||||
|
|
||||||
// 4. Append new rows.
|
// 4. Append new rows.
|
||||||
var newRows [][]any
|
var newRows [][]any
|
||||||
|
|||||||
Reference in New Issue
Block a user