fix(python): parse Fio 2-digit-year dates + add make sync-debug
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Build and Push / build (push) Successful in 6s
Build and Push / build-go (push) Successful in 59s

Fio's transparent page now serves recent rows as DD.MM.YY while older
rows stay DD.MM.YYYY. parse_czech_date only knew the 4-digit form so
every recent transfer was silently dropped — make sync-2026 reported
zero new transactions. Adds %d.%m.%y and %d/%m/%y to the format list,
mirroring the Go-side fix from 2026-05-07.

Also adds a Python analog of make go-sync-debug:
- --dry-run skips header write / append / sort and prints "would …" lines
- --print-fio-table prints aligned per-txn table with NEW/DUP status
- make sync-debug [DAYS=N] wrapper (default DAYS=30)
- always-on stderr diagnostics in fio_utils: which fetcher was chosen
  (with FIO_API_TOKEN-unset lag warning) + raw-vs-filtered counts, so
  this class of "scraper drops everything" bug surfaces immediately.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 22:56:49 +02:00
parent f25552eef2
commit aaa876e593
4 changed files with 126 additions and 31 deletions

View File

@@ -4,6 +4,7 @@
import json
import os
import re
import sys
import urllib.request
from datetime import datetime
from html.parser import HTMLParser
@@ -89,9 +90,11 @@ def parse_czech_amount(s: str) -> float | None:
def parse_czech_date(s: str) -> str | None:
"""Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'."""
"""Parse a Czech date to 'YYYY-MM-DD'. Accepts 4-digit and 2-digit years
with dot or slash separators; Fio's transparent page mixes 'DD.MM.YYYY'
and 'DD.MM.YY' in the same response."""
s = s.strip()
for fmt in ("%d.%m.%Y", "%d/%m/%Y"):
for fmt in ("%d.%m.%Y", "%d/%m/%Y", "%d.%m.%y", "%d/%m/%y"):
try:
return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
except ValueError:
@@ -146,6 +149,7 @@ def fetch_transactions_transparent(
"bank_id": "", # HTML scraping doesn't give stable ID
})
print(f"fio: transparent fetched {len(rows)} raw rows, {len(transactions)} transaction(s) after filtering", file=sys.stderr)
return transactions
@@ -169,7 +173,8 @@ def fetch_transactions_api(
transactions = []
tx_list = data.get("accountStatement", {}).get("transactionList", {})
for tx in (tx_list.get("transaction") or []):
raw_list = tx_list.get("transaction") or []
for tx in raw_list:
# Each field is {"value": ..., "name": ..., "id": ...} or null
def val(col_id):
col = tx.get(f"column{col_id}")
@@ -197,6 +202,7 @@ def fetch_transactions_api(
"currency": str(val(14) or "CZK"), # column14 = Currency
})
print(f"fio: api fetched {len(raw_list)} raw transaction(s), {len(transactions)} after filtering", file=sys.stderr)
return transactions
@@ -204,8 +210,14 @@ def fetch_transactions(date_from: str, date_to: str) -> list[dict]:
"""Fetch transactions, using API if token available, else transparent page."""
token = os.environ.get("FIO_API_TOKEN", "").strip()
if token:
print(f"fio: using authenticated API, window {date_from}..{date_to}", file=sys.stderr)
return fetch_transactions_api(token, date_from, date_to)
print(
f"fio: using transparent page (FIO_API_TOKEN unset — expect publishing lag), "
f"window {date_from}..{date_to}, account=2800359168",
file=sys.stderr,
)
# Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL
from_dt = datetime.strptime(date_from, "%Y-%m-%d")
to_dt = datetime.strptime(date_to, "%Y-%m-%d")