feat(go): IO layer behind interfaces (M4) #13

Merged
kacerr merged 4 commits from feat/m4-io-layer into main 2026-05-07 10:48:54 +02:00
3 changed files with 40 additions and 9 deletions
Showing only changes of commit fcb83691f5 - Show all commits

View File

@@ -95,6 +95,8 @@ func TestParseCzechDate(t *testing.T) {
cases := []struct{ in, want string }{
{"10.04.2026", "2026-04-10"},
{"10/04/2026", "2026-04-10"},
{"7.5.2026", "2026-05-07"}, // non-padded — real Fio transparent page format
{"3.12.2025", "2025-12-03"}, // non-padded single-digit day, double-digit month
{"", ""},
{"invalid", ""},
}
@@ -105,6 +107,22 @@ func TestParseCzechDate(t *testing.T) {
}
}
func TestExtractSecondTableRows_NestedTable(t *testing.T) {
// Regression: a nested <table> inside the target must not cause early exit.
html := `<table class="table"><tr><td>nav</td></tr></table>
<table class="table">
<thead><tr><th>Date</th></tr></thead>
<tbody>
<tr><td>7.5.2026</td><td><table><tr><td>nested</td></tr></table></td></tr>
<tr><td>6.5.2026</td><td></td></tr>
</tbody>
</table>`
rows := extractSecondTableRows([]byte(html))
if len(rows) != 2 {
t.Errorf("want 2 data rows, got %d: %v", len(rows), rows)
}
}
func TestParseCzechAmount(t *testing.T) {
cases := []struct {
in string

View File

@@ -91,11 +91,13 @@ func parseTransparentHTML(body []byte) ([]Transaction, error) {
// extractSecondTableRows walks the HTML token stream and returns data rows
// from the second <table class="table"> element, skipping the <thead>.
// It tracks nesting depth so that nested <table> elements inside the target
// do not trigger an early exit.
func extractSecondTableRows(body []byte) [][]string {
z := ghtml.NewTokenizer(strings.NewReader(string(body)))
tableCount := 0
inTarget := false
targetDepth := 0 // >0 while inside the target table (handles nesting)
inThead := false
inRow := false
inCell := false
@@ -113,18 +115,20 @@ func extractSecondTableRows(body []byte) [][]string {
t := z.Token()
switch t.Data {
case "table":
if hasClass(t, "table") {
if targetDepth > 0 {
targetDepth++ // nested table inside target; track so </table> doesn't exit early
} else if hasClass(t, "table") {
tableCount++
if tableCount == 2 {
inTarget = true
targetDepth = 1
}
}
case "thead":
if inTarget {
if targetDepth > 0 {
inThead = true
}
case "tr":
if inTarget && !inThead {
if targetDepth > 0 && !inThead {
inRow = true
currentRow = nil
}
@@ -152,8 +156,11 @@ func extractSecondTableRows(body []byte) [][]string {
inRow = false
}
case "table":
if inTarget {
return rows
if targetDepth > 0 {
targetDepth--
if targetDepth == 0 {
return rows
}
}
}
case ghtml.TextToken:
@@ -178,11 +185,13 @@ func hasClass(t ghtml.Token, cls string) bool {
return false
}
// parseCzechDate parses "DD.MM.YYYY" or "DD/MM/YYYY" → "YYYY-MM-DD".
// parseCzechDate parses Czech date strings → "YYYY-MM-DD".
// Handles both zero-padded ("07.05.2026") and non-padded ("7.5.2026") variants
// with dot or slash separators, as the Fio transparent page omits leading zeros.
// Returns "" on parse error.
func parseCzechDate(s string) string {
s = strings.TrimSpace(s)
for _, layout := range []string{"02.01.2006", "02/01/2006"} {
for _, layout := range []string{"2.1.2006", "02.01.2006", "2/1/2006", "02/01/2006"} {
if t, err := time.Parse(layout, s); err == nil {
return t.Format("2006-01-02")
}

View File

@@ -87,6 +87,10 @@ func SyncToSheets(
if err != nil {
return 0, fmt.Errorf("sync: fetch fio: %w", err)
}
if opts.DryRun {
fmt.Printf("Dry run: window %s to %s, fetched %d transaction(s) from Fio\n",
from.Format("2006-01-02"), to.Format("2006-01-02"), len(txns))
}
// 4. Append new rows.
var newRows [][]any