Files
fuj-management/go/internal/io/fio/transparent.go
Jan Novak b41b8ef29c
All checks were successful
Deploy to K8s / deploy (push) Successful in 9s
fix(go/fio): accept 2-digit year format in transparent date parser
Fio's transparent account page now serves dates as DD.MM.YY (e.g.
07.05.26) rather than the previously expected 4-digit-year format.
Extends parseCzechDate to try all eight layout variants: padded and
non-padded, dot and slash separators, 4-digit and 2-digit years.

Go maps 2-digit year 00-68 → 2000-2068, so 26 → 2026.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 14:12:34 +02:00

248 lines
5.8 KiB
Go
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package fio
import (
"context"
"fmt"
"io"
"log/slog"
"net/http"
"regexp"
"strings"
"time"
"unicode"
ghtml "golang.org/x/net/html"
)
// transparentClient fetches transactions from the Fio transparent account page (HTML).
// Ports scripts/fio_utils.py FioTableParser + fetch_transactions_transparent.
type transparentClient struct {
accountNum string
hc httpDoer
}
func (c *transparentClient) FetchTransactions(ctx context.Context, from, to time.Time) ([]Transaction, error) {
// Transparent page date format: D.M.YYYY
url := fmt.Sprintf(
"https://ib.fio.cz/ib/transparent?a=%s&f=%s&t=%s",
c.accountNum,
from.Format("2.1.2006"),
to.Format("2.1.2006"),
)
slog.Debug("fio transparent: GET",
"url", url,
"from", from.Format("2006-01-02"), "to", to.Format("2006-01-02"))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := c.hc.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
slog.Debug("fio transparent: response", "status", resp.StatusCode)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fio transparent: HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
slog.Debug("fio transparent: body read", "body_bytes", len(body))
return parseTransparentHTML(body)
}
// Column indices in the transparent-page table (0-based).
// Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka
const (
tColDate = 0
tColAmount = 1
tColSender = 3
tColMessage = 4
tColKS = 5
tColVS = 6
tColSS = 7
)
func parseTransparentHTML(body []byte) ([]Transaction, error) {
rows := extractSecondTableRows(body)
var txns []Transaction
var droppedBadDate, droppedNonpositive int
for _, row := range rows {
col := func(i int) string {
if i < len(row) {
return strings.TrimSpace(row[i])
}
return ""
}
dateStr := parseCzechDate(col(tColDate))
amount := parseCzechAmount(col(tColAmount))
if dateStr == "" {
droppedBadDate++
continue
}
if amount <= 0 {
droppedNonpositive++
continue
}
txns = append(txns, Transaction{
Date: dateStr,
Amount: amount,
Sender: col(tColSender),
Message: col(tColMessage),
KS: col(tColKS),
VS: col(tColVS),
SS: col(tColSS),
BankID: "", // not available on HTML path
})
}
slog.Debug("fio transparent: parsed",
"raw_rows", len(rows),
"kept", len(txns),
"dropped_bad_date", droppedBadDate,
"dropped_nonpositive_amount", droppedNonpositive)
return txns, nil
}
// extractSecondTableRows walks the HTML token stream and returns data rows
// from the second <table class="table"> element, skipping the <thead>.
// It tracks nesting depth so that nested <table> elements inside the target
// do not trigger an early exit.
func extractSecondTableRows(body []byte) [][]string {
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
tableCount := 0
targetDepth := 0 // >0 while inside the target table (handles nesting)
inThead := false
inRow := false
inCell := false
var currentRow []string
var cellBuf strings.Builder
var rows [][]string
for {
tt := z.Next()
if tt == ghtml.ErrorToken {
break
}
switch tt {
case ghtml.StartTagToken:
t := z.Token()
switch t.Data {
case "table":
if targetDepth > 0 {
targetDepth++ // nested table inside target; track so </table> doesn't exit early
} else if hasClass(t, "table") {
tableCount++
if tableCount == 2 {
targetDepth = 1
}
}
case "thead":
if targetDepth > 0 {
inThead = true
}
case "tr":
if targetDepth > 0 && !inThead {
inRow = true
currentRow = nil
}
case "td", "th":
if inRow {
inCell = true
cellBuf.Reset()
}
}
case ghtml.EndTagToken:
t := z.Token()
switch t.Data {
case "td", "th":
if inCell {
currentRow = append(currentRow, cellBuf.String())
inCell = false
}
case "thead":
inThead = false
case "tr":
if inRow {
if len(currentRow) > 0 {
rows = append(rows, currentRow)
}
inRow = false
}
case "table":
if targetDepth > 0 {
targetDepth--
if targetDepth == 0 {
return rows
}
}
}
case ghtml.TextToken:
if inCell {
cellBuf.WriteString(z.Token().Data)
}
}
}
return rows
}
func hasClass(t ghtml.Token, cls string) bool {
for _, a := range t.Attr {
if a.Key == "class" {
for _, c := range strings.Fields(a.Val) {
if c == cls {
return true
}
}
}
}
return false
}
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
// with dot or slash separators, as the Fio transparent page omits leading zeros.
// Returns "" on parse error.
func parseCzechDate(s string) string {
s = strings.TrimSpace(s)
for _, layout := range []string{
"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006",
"2.1.06", "02.01.06", "2/1/06", "02/01/06",
} {
if t, err := time.Parse(layout, s); err == nil {
return t.Format("2006-01-02")
}
}
return ""
}
var nonNumericRe = regexp.MustCompile(`[^\d.,]`)
// parseCzechAmount parses "1 500,00 CZK" / "1.500,00" / "1500.00" → float64.
// Returns 0 on error.
func parseCzechAmount(s string) float64 {
// Remove NBSP, regular spaces, currency letters
s = strings.Map(func(r rune) rune {
if r == ' ' || unicode.IsSpace(r) || unicode.IsLetter(r) {
return -1
}
return r
}, s)
if strings.Contains(s, ",") {
// Czech decimal: 1.500,00 → remove dots (thousand sep), comma → dot
s = strings.ReplaceAll(s, ".", "")
s = strings.ReplaceAll(s, ",", ".")
} else {
// Remove any remaining non-numeric except one dot
s = nonNumericRe.ReplaceAllString(s, "")
}
var f float64
_, _ = fmt.Sscanf(s, "%f", &f)
return f
}