fuj-management/go/internal/io/fio/transparent.go

package fio

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"regexp"
	"strings"
	"time"
	"unicode"

	ghtml "golang.org/x/net/html"
)

// transparentClient fetches transactions from the Fio transparent account page (HTML).
// Ports scripts/fio_utils.py FioTableParser + fetch_transactions_transparent.
type transparentClient struct {
	accountNum string
	hc         httpDoer
}

func (c *transparentClient) FetchTransactions(ctx context.Context, from, to time.Time) ([]Transaction, error) {
	// Transparent page date format: D.M.YYYY
	url := fmt.Sprintf(
		"https://ib.fio.cz/ib/transparent?a=%s&f=%s&t=%s",
		c.accountNum,
		from.Format("2.1.2006"),
		to.Format("2.1.2006"),
	)
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
	if err != nil {
		return nil, err
	}
	resp, err := c.hc.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("fio transparent: HTTP %d", resp.StatusCode)
	}
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}
	return parseTransparentHTML(body)
}

// Column indices in the transparent-page table (0-based).
// Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka
const (
	tColDate    = 0
	tColAmount  = 1
	tColSender  = 3
	tColMessage = 4
	tColKS      = 5
	tColVS      = 6
	tColSS      = 7
)

func parseTransparentHTML(body []byte) ([]Transaction, error) {
	rows := extractSecondTableRows(body)

	var txns []Transaction
	for _, row := range rows {
		col := func(i int) string {
			if i < len(row) {
				return strings.TrimSpace(row[i])
			}
			return ""
		}
		dateStr := parseCzechDate(col(tColDate))
		amount := parseCzechAmount(col(tColAmount))
		if dateStr == "" || amount <= 0 {
			continue
		}
		txns = append(txns, Transaction{
			Date:    dateStr,
			Amount:  amount,
			Sender:  col(tColSender),
			Message: col(tColMessage),
			KS:      col(tColKS),
			VS:      col(tColVS),
			SS:      col(tColSS),
			BankID:  "", // not available on HTML path
		})
	}
	return txns, nil
}

// extractSecondTableRows walks the HTML token stream and returns data rows
// from the second <table class="table"> element, skipping the <thead>.
// It tracks nesting depth so that nested <table> elements inside the target
// do not trigger an early exit.
func extractSecondTableRows(body []byte) [][]string {
	z := ghtml.NewTokenizer(strings.NewReader(string(body)))

	tableCount := 0
	targetDepth := 0 // >0 while inside the target table (handles nesting)
	inThead := false
	inRow := false
	inCell := false
	var currentRow []string
	var cellBuf strings.Builder
	var rows [][]string

	for {
		tt := z.Next()
		if tt == ghtml.ErrorToken {
			break
		}
		switch tt {
		case ghtml.StartTagToken:
			t := z.Token()
			switch t.Data {
			case "table":
				if targetDepth > 0 {
					targetDepth++ // nested table inside target; track so </table> doesn't exit early
				} else if hasClass(t, "table") {
					tableCount++
					if tableCount == 2 {
						targetDepth = 1
					}
				}
			case "thead":
				if targetDepth > 0 {
					inThead = true
				}
			case "tr":
				if targetDepth > 0 && !inThead {
					inRow = true
					currentRow = nil
				}
			case "td", "th":
				if inRow {
					inCell = true
					cellBuf.Reset()
				}
			}
		case ghtml.EndTagToken:
			t := z.Token()
			switch t.Data {
			case "td", "th":
				if inCell {
					currentRow = append(currentRow, cellBuf.String())
					inCell = false
				}
			case "thead":
				inThead = false
			case "tr":
				if inRow {
					if len(currentRow) > 0 {
						rows = append(rows, currentRow)
					}
					inRow = false
				}
			case "table":
				if targetDepth > 0 {
					targetDepth--
					if targetDepth == 0 {
						return rows
					}
				}
			}
		case ghtml.TextToken:
			if inCell {
				cellBuf.WriteString(z.Token().Data)
			}
		}
	}
	return rows
}

func hasClass(t ghtml.Token, cls string) bool {
	for _, a := range t.Attr {
		if a.Key == "class" {
			for _, c := range strings.Fields(a.Val) {
				if c == cls {
					return true
				}
			}
		}
	}
	return false
}

// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
// with dot or slash separators, as the Fio transparent page omits leading zeros.
// Returns "" on parse error.
func parseCzechDate(s string) string {
	s = strings.TrimSpace(s)
	for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
		if t, err := time.Parse(layout, s); err == nil {
			return t.Format("2006-01-02")
		}
	}
	return ""
}

var nonNumericRe = regexp.MustCompile(`[^\d.,]`)

// parseCzechAmount parses "1 500,00 CZK" / "1.500,00" / "1500.00" → float64.
// Returns 0 on error.
func parseCzechAmount(s string) float64 {
	// Remove NBSP, regular spaces, currency letters
	s = strings.Map(func(r rune) rune {
		if r == ' ' || unicode.IsSpace(r) || unicode.IsLetter(r) {
			return -1
		}
		return r
	}, s)

	if strings.Contains(s, ",") {
		// Czech decimal: 1.500,00 → remove dots (thousand sep), comma → dot
		s = strings.ReplaceAll(s, ".", "")
		s = strings.ReplaceAll(s, ",", ".")
	} else {
		// Remove any remaining non-numeric except one dot
		s = nonNumericRe.ReplaceAllString(s, "")
	}
	var f float64
	_, _ = fmt.Sscanf(s, "%f", &f)
	return f
}