Files
fuj-management/go/internal/io/fio/transparent.go
Jan Novak fcb83691f5
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
fix(go/fio): nested-table early exit + non-padded date parsing
extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.

parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.

Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 10:47:54 +02:00

227 lines
5.2 KiB
Go
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package fio
import (
"context"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
"unicode"
ghtml "golang.org/x/net/html"
)
// transparentClient fetches transactions from the Fio transparent account page (HTML).
// Ports scripts/fio_utils.py FioTableParser + fetch_transactions_transparent.
type transparentClient struct {
accountNum string
hc httpDoer
}
func (c *transparentClient) FetchTransactions(ctx context.Context, from, to time.Time) ([]Transaction, error) {
// Transparent page date format: D.M.YYYY
url := fmt.Sprintf(
"https://ib.fio.cz/ib/transparent?a=%s&f=%s&t=%s",
c.accountNum,
from.Format("2.1.2006"),
to.Format("2.1.2006"),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := c.hc.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fio transparent: HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return parseTransparentHTML(body)
}
// Column indices in the transparent-page table (0-based).
// Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka
const (
tColDate = 0
tColAmount = 1
tColSender = 3
tColMessage = 4
tColKS = 5
tColVS = 6
tColSS = 7
)
func parseTransparentHTML(body []byte) ([]Transaction, error) {
rows := extractSecondTableRows(body)
var txns []Transaction
for _, row := range rows {
col := func(i int) string {
if i < len(row) {
return strings.TrimSpace(row[i])
}
return ""
}
dateStr := parseCzechDate(col(tColDate))
amount := parseCzechAmount(col(tColAmount))
if dateStr == "" || amount <= 0 {
continue
}
txns = append(txns, Transaction{
Date: dateStr,
Amount: amount,
Sender: col(tColSender),
Message: col(tColMessage),
KS: col(tColKS),
VS: col(tColVS),
SS: col(tColSS),
BankID: "", // not available on HTML path
})
}
return txns, nil
}
// extractSecondTableRows walks the HTML token stream and returns data rows
// from the second <table class="table"> element, skipping the <thead>.
// It tracks nesting depth so that nested <table> elements inside the target
// do not trigger an early exit.
func extractSecondTableRows(body []byte) [][]string {
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
tableCount := 0
targetDepth := 0 // >0 while inside the target table (handles nesting)
inThead := false
inRow := false
inCell := false
var currentRow []string
var cellBuf strings.Builder
var rows [][]string
for {
tt := z.Next()
if tt == ghtml.ErrorToken {
break
}
switch tt {
case ghtml.StartTagToken:
t := z.Token()
switch t.Data {
case "table":
if targetDepth > 0 {
targetDepth++ // nested table inside target; track so </table> doesn't exit early
} else if hasClass(t, "table") {
tableCount++
if tableCount == 2 {
targetDepth = 1
}
}
case "thead":
if targetDepth > 0 {
inThead = true
}
case "tr":
if targetDepth > 0 && !inThead {
inRow = true
currentRow = nil
}
case "td", "th":
if inRow {
inCell = true
cellBuf.Reset()
}
}
case ghtml.EndTagToken:
t := z.Token()
switch t.Data {
case "td", "th":
if inCell {
currentRow = append(currentRow, cellBuf.String())
inCell = false
}
case "thead":
inThead = false
case "tr":
if inRow {
if len(currentRow) > 0 {
rows = append(rows, currentRow)
}
inRow = false
}
case "table":
if targetDepth > 0 {
targetDepth--
if targetDepth == 0 {
return rows
}
}
}
case ghtml.TextToken:
if inCell {
cellBuf.WriteString(z.Token().Data)
}
}
}
return rows
}
func hasClass(t ghtml.Token, cls string) bool {
for _, a := range t.Attr {
if a.Key == "class" {
for _, c := range strings.Fields(a.Val) {
if c == cls {
return true
}
}
}
}
return false
}
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
// with dot or slash separators, as the Fio transparent page omits leading zeros.
// Returns "" on parse error.
func parseCzechDate(s string) string {
s = strings.TrimSpace(s)
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
if t, err := time.Parse(layout, s); err == nil {
return t.Format("2006-01-02")
}
}
return ""
}
var nonNumericRe = regexp.MustCompile(`[^\d.,]`)
// parseCzechAmount parses "1 500,00 CZK" / "1.500,00" / "1500.00" → float64.
// Returns 0 on error.
func parseCzechAmount(s string) float64 {
// Remove NBSP, regular spaces, currency letters
s = strings.Map(func(r rune) rune {
if r == ' ' || unicode.IsSpace(r) || unicode.IsLetter(r) {
return -1
}
return r
}, s)
if strings.Contains(s, ",") {
// Czech decimal: 1.500,00 → remove dots (thousand sep), comma → dot
s = strings.ReplaceAll(s, ".", "")
s = strings.ReplaceAll(s, ",", ".")
} else {
// Remove any remaining non-numeric except one dot
s = nonNumericRe.ReplaceAllString(s, "")
}
var f float64
_, _ = fmt.Sscanf(s, "%f", &f)
return f
}