fix(go/fio): nested-table early exit + non-padded date parsing
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.
parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.
Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) {
|
||||
|
||||
// extractSecondTableRows walks the HTML token stream and returns data rows
|
||||
// from the second <table class="table"> element, skipping the <thead>.
|
||||
// It tracks nesting depth so that nested <table> elements inside the target
|
||||
// do not trigger an early exit.
|
||||
func extractSecondTableRows(body []byte) [][]string {
|
||||
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
|
||||
|
||||
tableCount := 0
|
||||
inTarget := false
|
||||
targetDepth := 0 // >0 while inside the target table (handles nesting)
|
||||
inThead := false
|
||||
inRow := false
|
||||
inCell := false
|
||||
@@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string {
|
||||
t := z.Token()
|
||||
switch t.Data {
|
||||
case "table":
|
||||
if hasClass(t, "table") {
|
||||
if targetDepth > 0 {
|
||||
targetDepth++ // nested table inside target; track so </table> doesn't exit early
|
||||
} else if hasClass(t, "table") {
|
||||
tableCount++
|
||||
if tableCount == 2 {
|
||||
inTarget = true
|
||||
targetDepth = 1
|
||||
}
|
||||
}
|
||||
case "thead":
|
||||
if inTarget {
|
||||
if targetDepth > 0 {
|
||||
inThead = true
|
||||
}
|
||||
case "tr":
|
||||
if inTarget && !inThead {
|
||||
if targetDepth > 0 && !inThead {
|
||||
inRow = true
|
||||
currentRow = nil
|
||||
}
|
||||
@@ -152,8 +156,11 @@ func extractSecondTableRows(body []byte) [][]string {
|
||||
inRow = false
|
||||
}
|
||||
case "table":
|
||||
if inTarget {
|
||||
return rows
|
||||
if targetDepth > 0 {
|
||||
targetDepth--
|
||||
if targetDepth == 0 {
|
||||
return rows
|
||||
}
|
||||
}
|
||||
}
|
||||
case ghtml.TextToken:
|
||||
@@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD".
|
||||
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
|
||||
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
|
||||
// with dot or slash separators, as the Fio transparent page omits leading zeros.
|
||||
// Returns "" on parse error.
|
||||
func parseCzechDate(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
for _, layout := range []string{"02.01.2006", "02/01/2006"} {
|
||||
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
|
||||
if t, err := time.Parse(layout, s); err == nil {
|
||||
return t.Format("2006-01-02")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user