All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
extractSecondTableRows tracked a boolean inTarget flag and exited on
the first </table> token while inside the target. Any nested <table>
(e.g. pagination markup in the real Fio page) would cause an early
return before reading any data rows, explaining the 0-transaction report.
Fixed by tracking targetDepth instead: depth increments on every <table>
inside the target and we only return when it reaches 0 again.
parseCzechDate also only tried zero-padded layouts ("02.01.2006").
The real Fio transparent page emits non-padded dates ("7.5.2026");
added "2.1.2006" and "2/1/2006" as the preferred layouts.
Also adds a dry-run diagnostic line ("fetched N transaction(s) from Fio")
so the fetch vs dedup split is visible without reading logs.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
227 lines
5.2 KiB
Go
227 lines
5.2 KiB
Go
package fio
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
"unicode"
|
||
|
||
ghtml "golang.org/x/net/html"
|
||
)
|
||
|
||
// transparentClient fetches transactions from the Fio transparent account page (HTML).
|
||
// Ports scripts/fio_utils.py FioTableParser + fetch_transactions_transparent.
|
||
type transparentClient struct {
|
||
accountNum string
|
||
hc httpDoer
|
||
}
|
||
|
||
func (c *transparentClient) FetchTransactions(ctx context.Context, from, to time.Time) ([]Transaction, error) {
|
||
// Transparent page date format: D.M.YYYY
|
||
url := fmt.Sprintf(
|
||
"https://ib.fio.cz/ib/transparent?a=%s&f=%s&t=%s",
|
||
c.accountNum,
|
||
from.Format("2.1.2006"),
|
||
to.Format("2.1.2006"),
|
||
)
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
resp, err := c.hc.Do(req)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer resp.Body.Close()
|
||
if resp.StatusCode != http.StatusOK {
|
||
return nil, fmt.Errorf("fio transparent: HTTP %d", resp.StatusCode)
|
||
}
|
||
body, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return parseTransparentHTML(body)
|
||
}
|
||
|
||
// Column indices in the transparent-page table (0-based).
|
||
// Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka
|
||
const (
|
||
tColDate = 0
|
||
tColAmount = 1
|
||
tColSender = 3
|
||
tColMessage = 4
|
||
tColKS = 5
|
||
tColVS = 6
|
||
tColSS = 7
|
||
)
|
||
|
||
func parseTransparentHTML(body []byte) ([]Transaction, error) {
|
||
rows := extractSecondTableRows(body)
|
||
|
||
var txns []Transaction
|
||
for _, row := range rows {
|
||
col := func(i int) string {
|
||
if i < len(row) {
|
||
return strings.TrimSpace(row[i])
|
||
}
|
||
return ""
|
||
}
|
||
dateStr := parseCzechDate(col(tColDate))
|
||
amount := parseCzechAmount(col(tColAmount))
|
||
if dateStr == "" || amount <= 0 {
|
||
continue
|
||
}
|
||
txns = append(txns, Transaction{
|
||
Date: dateStr,
|
||
Amount: amount,
|
||
Sender: col(tColSender),
|
||
Message: col(tColMessage),
|
||
KS: col(tColKS),
|
||
VS: col(tColVS),
|
||
SS: col(tColSS),
|
||
BankID: "", // not available on HTML path
|
||
})
|
||
}
|
||
return txns, nil
|
||
}
|
||
|
||
// extractSecondTableRows walks the HTML token stream and returns data rows
|
||
// from the second <table class="table"> element, skipping the <thead>.
|
||
// It tracks nesting depth so that nested <table> elements inside the target
|
||
// do not trigger an early exit.
|
||
func extractSecondTableRows(body []byte) [][]string {
|
||
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
|
||
|
||
tableCount := 0
|
||
targetDepth := 0 // >0 while inside the target table (handles nesting)
|
||
inThead := false
|
||
inRow := false
|
||
inCell := false
|
||
var currentRow []string
|
||
var cellBuf strings.Builder
|
||
var rows [][]string
|
||
|
||
for {
|
||
tt := z.Next()
|
||
if tt == ghtml.ErrorToken {
|
||
break
|
||
}
|
||
switch tt {
|
||
case ghtml.StartTagToken:
|
||
t := z.Token()
|
||
switch t.Data {
|
||
case "table":
|
||
if targetDepth > 0 {
|
||
targetDepth++ // nested table inside target; track so </table> doesn't exit early
|
||
} else if hasClass(t, "table") {
|
||
tableCount++
|
||
if tableCount == 2 {
|
||
targetDepth = 1
|
||
}
|
||
}
|
||
case "thead":
|
||
if targetDepth > 0 {
|
||
inThead = true
|
||
}
|
||
case "tr":
|
||
if targetDepth > 0 && !inThead {
|
||
inRow = true
|
||
currentRow = nil
|
||
}
|
||
case "td", "th":
|
||
if inRow {
|
||
inCell = true
|
||
cellBuf.Reset()
|
||
}
|
||
}
|
||
case ghtml.EndTagToken:
|
||
t := z.Token()
|
||
switch t.Data {
|
||
case "td", "th":
|
||
if inCell {
|
||
currentRow = append(currentRow, cellBuf.String())
|
||
inCell = false
|
||
}
|
||
case "thead":
|
||
inThead = false
|
||
case "tr":
|
||
if inRow {
|
||
if len(currentRow) > 0 {
|
||
rows = append(rows, currentRow)
|
||
}
|
||
inRow = false
|
||
}
|
||
case "table":
|
||
if targetDepth > 0 {
|
||
targetDepth--
|
||
if targetDepth == 0 {
|
||
return rows
|
||
}
|
||
}
|
||
}
|
||
case ghtml.TextToken:
|
||
if inCell {
|
||
cellBuf.WriteString(z.Token().Data)
|
||
}
|
||
}
|
||
}
|
||
return rows
|
||
}
|
||
|
||
func hasClass(t ghtml.Token, cls string) bool {
|
||
for _, a := range t.Attr {
|
||
if a.Key == "class" {
|
||
for _, c := range strings.Fields(a.Val) {
|
||
if c == cls {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
|
||
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
|
||
// with dot or slash separators, as the Fio transparent page omits leading zeros.
|
||
// Returns "" on parse error.
|
||
func parseCzechDate(s string) string {
|
||
s = strings.TrimSpace(s)
|
||
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
|
||
if t, err := time.Parse(layout, s); err == nil {
|
||
return t.Format("2006-01-02")
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
var nonNumericRe = regexp.MustCompile(`[^\d.,]`)
|
||
|
||
// parseCzechAmount parses "1 500,00 CZK" / "1.500,00" / "1500.00" → float64.
|
||
// Returns 0 on error.
|
||
func parseCzechAmount(s string) float64 {
|
||
// Remove NBSP, regular spaces, currency letters
|
||
s = strings.Map(func(r rune) rune {
|
||
if r == ' ' || unicode.IsSpace(r) || unicode.IsLetter(r) {
|
||
return -1
|
||
}
|
||
return r
|
||
}, s)
|
||
|
||
if strings.Contains(s, ",") {
|
||
// Czech decimal: 1.500,00 → remove dots (thousand sep), comma → dot
|
||
s = strings.ReplaceAll(s, ".", "")
|
||
s = strings.ReplaceAll(s, ",", ".")
|
||
} else {
|
||
// Remove any remaining non-numeric except one dot
|
||
s = nonNumericRe.ReplaceAllString(s, "")
|
||
}
|
||
var f float64
|
||
_, _ = fmt.Sscanf(s, "%f", &f)
|
||
return f
|
||
}
|