package fio import ( "context" "fmt" "io" "log/slog" "net/http" "regexp" "strings" "time" "unicode" ghtml "golang.org/x/net/html" ) // transparentClient fetches transactions from the Fio transparent account page (HTML). // Ports scripts/fio_utils.py FioTableParser + fetch_transactions_transparent. type transparentClient struct { accountNum string hc httpDoer } func (c *transparentClient) FetchTransactions(ctx context.Context, from, to time.Time) ([]Transaction, error) { // Transparent page date format: D.M.YYYY url := fmt.Sprintf( "https://ib.fio.cz/ib/transparent?a=%s&f=%s&t=%s", c.accountNum, from.Format("2.1.2006"), to.Format("2.1.2006"), ) slog.Debug("fio transparent: GET", "url", url, "from", from.Format("2006-01-02"), "to", to.Format("2006-01-02")) req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } resp, err := c.hc.Do(req) if err != nil { return nil, err } defer resp.Body.Close() slog.Debug("fio transparent: response", "status", resp.StatusCode) if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("fio transparent: HTTP %d", resp.StatusCode) } body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } slog.Debug("fio transparent: body read", "body_bytes", len(body)) return parseTransparentHTML(body) } // Column indices in the transparent-page table (0-based). // Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka const ( tColDate = 0 tColAmount = 1 tColSender = 3 tColMessage = 4 tColKS = 5 tColVS = 6 tColSS = 7 ) func parseTransparentHTML(body []byte) ([]Transaction, error) { rows := extractSecondTableRows(body) var txns []Transaction var droppedBadDate, droppedNonpositive int for _, row := range rows { col := func(i int) string { if i < len(row) { return strings.TrimSpace(row[i]) } return "" } dateStr := parseCzechDate(col(tColDate)) amount := parseCzechAmount(col(tColAmount)) if dateStr == "" { droppedBadDate++ continue } if amount <= 0 { droppedNonpositive++ continue } txns = append(txns, Transaction{ Date: dateStr, Amount: amount, Sender: col(tColSender), Message: col(tColMessage), KS: col(tColKS), VS: col(tColVS), SS: col(tColSS), BankID: "", // not available on HTML path }) } slog.Debug("fio transparent: parsed", "raw_rows", len(rows), "kept", len(txns), "dropped_bad_date", droppedBadDate, "dropped_nonpositive_amount", droppedNonpositive) return txns, nil } // extractSecondTableRows walks the HTML token stream and returns data rows // from the second element, skipping the . // It tracks nesting depth so that nested
elements inside the target // do not trigger an early exit. func extractSecondTableRows(body []byte) [][]string { z := ghtml.NewTokenizer(strings.NewReader(string(body))) tableCount := 0 targetDepth := 0 // >0 while inside the target table (handles nesting) inThead := false inRow := false inCell := false var currentRow []string var cellBuf strings.Builder var rows [][]string for { tt := z.Next() if tt == ghtml.ErrorToken { break } switch tt { case ghtml.StartTagToken: t := z.Token() switch t.Data { case "table": if targetDepth > 0 { targetDepth++ // nested table inside target; track so
doesn't exit early } else if hasClass(t, "table") { tableCount++ if tableCount == 2 { targetDepth = 1 } } case "thead": if targetDepth > 0 { inThead = true } case "tr": if targetDepth > 0 && !inThead { inRow = true currentRow = nil } case "td", "th": if inRow { inCell = true cellBuf.Reset() } } case ghtml.EndTagToken: t := z.Token() switch t.Data { case "td", "th": if inCell { currentRow = append(currentRow, cellBuf.String()) inCell = false } case "thead": inThead = false case "tr": if inRow { if len(currentRow) > 0 { rows = append(rows, currentRow) } inRow = false } case "table": if targetDepth > 0 { targetDepth-- if targetDepth == 0 { return rows } } } case ghtml.TextToken: if inCell { cellBuf.WriteString(z.Token().Data) } } } return rows } func hasClass(t ghtml.Token, cls string) bool { for _, a := range t.Attr { if a.Key == "class" { for _, c := range strings.Fields(a.Val) { if c == cls { return true } } } } return false } // parseCzechDate parses Czech date strings → "YYYY-MM-DD". // Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants // with dot or slash separators, as the Fio transparent page omits leading zeros. // Returns "" on parse error. func parseCzechDate(s string) string { s = strings.TrimSpace(s) for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} { if t, err := time.Parse(layout, s); err == nil { return t.Format("2006-01-02") } } return "" } var nonNumericRe = regexp.MustCompile(`[^\d.,]`) // parseCzechAmount parses "1 500,00 CZK" / "1.500,00" / "1500.00" → float64. // Returns 0 on error. func parseCzechAmount(s string) float64 { // Remove NBSP, regular spaces, currency letters s = strings.Map(func(r rune) rune { if r == ' ' || unicode.IsSpace(r) || unicode.IsLetter(r) { return -1 } return r }, s) if strings.Contains(s, ",") { // Czech decimal: 1.500,00 → remove dots (thousand sep), comma → dot s = strings.ReplaceAll(s, ".", "") s = strings.ReplaceAll(s, ",", ".") } else { // Remove any remaining non-numeric except one dot s = nonNumericRe.ReplaceAllString(s, "") } var f float64 _, _ = fmt.Sscanf(s, "%f", &f) return f }