fix(python): parse Fio 2-digit-year dates + add make sync-debug
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Build and Push / build (push) Successful in 6s
Build and Push / build-go (push) Successful in 59s

Fio's transparent page now serves recent rows as DD.MM.YY while older
rows stay DD.MM.YYYY. parse_czech_date only knew the 4-digit form so
every recent transfer was silently dropped — make sync-2026 reported
zero new transactions. Adds %d.%m.%y and %d/%m/%y to the format list,
mirroring the Go-side fix from 2026-05-07.

Also adds a Python analog of make go-sync-debug:
- --dry-run skips header write / append / sort and prints "would …" lines
- --print-fio-table prints aligned per-txn table with NEW/DUP status
- make sync-debug [DAYS=N] wrapper (default DAYS=30)
- always-on stderr diagnostics in fio_utils: which fetcher was chosen
  (with FIO_API_TOKEN-unset lag warning) + raw-vs-filtered counts, so
  this class of "scraper drops everything" bug surfaces immediately.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 22:56:49 +02:00
parent f25552eef2
commit aaa876e593
4 changed files with 126 additions and 31 deletions

View File

@@ -1,5 +1,11 @@
# Changelog
## 2026-05-11 22:56 CEST — fix(python): parse Fio 2-digit-year dates + add `make sync-debug` dry-run tool
- Fix: `scripts/fio_utils.py` `parse_czech_date` now accepts `DD.MM.YY` / `D.M.YY` in addition to the 4-digit-year variants. Fio's transparent page now mixes both forms in the same response; the 2-digit rows were being silently dropped, which caused `make sync-2026` to miss every recent transfer. Mirrors the Go-side fix from 2026-05-07 (CHANGELOG entry below).
- Added `--dry-run` and `--print-fio-table` flags to `scripts/sync_fio_to_sheets.py`, plus a `make sync-debug [DAYS=N]` Makefile target. Mirrors `make go-sync-debug`: fetches from Fio and dedupes against the sheet, prints `STATUS=NEW/DUP` per transaction, and prints per-row `Dry run: would append …` lines + `would sort by date` instead of touching the sheet.
- Added always-on stderr diagnostics in `scripts/fio_utils.py`: which fetcher was selected (authenticated API vs. transparent-page scraper with `FIO_API_TOKEN`-unset warning), and raw-vs-after-filter transaction counts on both paths — so this class of "scraper drops everything" bug surfaces immediately.
## 2026-05-08 14:55 CEST — feat(go): M6.6.1 — Pay-button QR popup modal
- Restored the Python `showPayQR` in-page modal UX that was lost in M6.6 (Pay buttons were navigating the tab to the raw `/qr` PNG).

View File

@@ -35,6 +35,7 @@ help:
@echo " make sync - Sync Fio transactions to Google Sheets"
@echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)"
@echo " make sync-2026 - Sync Fio transactions for the whole year of 2026"
@echo " make sync-debug [DAYS=N] - Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)"
@echo " make infer - Infer payment details (Person, Purpose, Amount) in the sheet"
@echo " make reconcile - Show balance report using Google Sheets data"
@echo " make venv - Sync virtual environment with pyproject.toml"
@@ -125,6 +126,9 @@ sync-2025: $(PYTHON)
sync-2026: $(PYTHON)
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --from 2026-01-01 --to 2026-12-31 --sort-by-date
sync-debug: $(PYTHON) ## Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --days $(DAYS) --dry-run --print-fio-table
infer: $(PYTHON)
$(PYTHON) scripts/infer_payments.py --credentials $(CREDENTIALS)

View File

@@ -4,6 +4,7 @@
import json
import os
import re
import sys
import urllib.request
from datetime import datetime
from html.parser import HTMLParser
@@ -89,9 +90,11 @@ def parse_czech_amount(s: str) -> float | None:
def parse_czech_date(s: str) -> str | None:
"""Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'."""
"""Parse a Czech date to 'YYYY-MM-DD'. Accepts 4-digit and 2-digit years
with dot or slash separators; Fio's transparent page mixes 'DD.MM.YYYY'
and 'DD.MM.YY' in the same response."""
s = s.strip()
for fmt in ("%d.%m.%Y", "%d/%m/%Y"):
for fmt in ("%d.%m.%Y", "%d/%m/%Y", "%d.%m.%y", "%d/%m/%y"):
try:
return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
except ValueError:
@@ -146,6 +149,7 @@ def fetch_transactions_transparent(
"bank_id": "", # HTML scraping doesn't give stable ID
})
print(f"fio: transparent fetched {len(rows)} raw rows, {len(transactions)} transaction(s) after filtering", file=sys.stderr)
return transactions
@@ -169,7 +173,8 @@ def fetch_transactions_api(
transactions = []
tx_list = data.get("accountStatement", {}).get("transactionList", {})
for tx in (tx_list.get("transaction") or []):
raw_list = tx_list.get("transaction") or []
for tx in raw_list:
# Each field is {"value": ..., "name": ..., "id": ...} or null
def val(col_id):
col = tx.get(f"column{col_id}")
@@ -197,6 +202,7 @@ def fetch_transactions_api(
"currency": str(val(14) or "CZK"), # column14 = Currency
})
print(f"fio: api fetched {len(raw_list)} raw transaction(s), {len(transactions)} after filtering", file=sys.stderr)
return transactions
@@ -204,8 +210,14 @@ def fetch_transactions(date_from: str, date_to: str) -> list[dict]:
"""Fetch transactions, using API if token available, else transparent page."""
token = os.environ.get("FIO_API_TOKEN", "").strip()
if token:
print(f"fio: using authenticated API, window {date_from}..{date_to}", file=sys.stderr)
return fetch_transactions_api(token, date_from, date_to)
print(
f"fio: using transparent page (FIO_API_TOKEN unset — expect publishing lag), "
f"window {date_from}..{date_to}, account=2800359168",
file=sys.stderr,
)
# Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL
from_dt = datetime.strptime(date_from, "%Y-%m-%d")
to_dt = datetime.strptime(date_to, "%Y-%m-%d")

View File

@@ -77,6 +77,35 @@ def generate_sync_id(tx: dict) -> str:
return hashlib.sha256(raw_str.encode("utf-8")).hexdigest()
def _trunc(s: str, n: int = 40) -> str:
s = str(s)
return s if len(s) <= n else s[: n - 1] + ""
def _print_fio_table(transactions: list[dict], statuses: list[str]) -> None:
headers = ["DATE", "AMOUNT", "SENDER", "VS", "MESSAGE", "BANKID", "STATUS"]
rows = [
[
str(tx.get("date", "")),
f"{float(tx.get('amount', 0)):.2f}",
str(tx.get("sender", "")),
str(tx.get("vs", "")),
_trunc(str(tx.get("message", ""))),
str(tx.get("bank_id", "")),
status,
]
for tx, status in zip(transactions, statuses)
]
widths = [
max(len(headers[i]), max((len(r[i]) for r in rows), default=0))
for i in range(len(headers))
]
sep = " "
print(sep.join(h.ljust(w) for h, w in zip(headers, widths)))
for row in rows:
print(sep.join(cell.ljust(w) for cell, w in zip(row, widths)))
def sort_sheet_by_date(service, spreadsheet_id):
"""Sort the sheet by the Date column (Column B)."""
# Get the sheet ID (gid) of the first sheet
@@ -104,12 +133,21 @@ def sort_sheet_by_date(service, spreadsheet_id):
print("Sheet sorted by date.")
def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None, date_from_str: str = None, date_to_str: str = None, sort_by_date: bool = False):
def sync_to_sheets(
spreadsheet_id: str,
credentials_path: str,
days: int = None,
date_from_str: str = None,
date_to_str: str = None,
sort_by_date: bool = False,
dry_run: bool = False,
print_fio_table: bool = False,
):
print(f"Connecting to Google Sheets using {credentials_path}...")
service = get_sheets_service(credentials_path)
sheet = service.spreadsheets()
# 1. Fetch existing IDs from Column G (last column in A-G range)
# 1. Read existing sync IDs from Column K
print(f"Reading existing sync IDs from sheet...")
try:
result = sheet.values().get(
@@ -117,19 +155,22 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
range="A1:K" # Include header and all columns to check Sync ID
).execute()
values = result.get("values", [])
# Check and insert labels if missing
if not values or values[0] != COLUMN_LABELS:
print("Inserting column labels...")
sheet.values().update(
spreadsheetId=spreadsheet_id,
range="A1",
valueInputOption="USER_ENTERED",
body={"values": [COLUMN_LABELS]}
).execute()
if dry_run:
print("Dry run: would write header row")
else:
print("Inserting column labels...")
sheet.values().update(
spreadsheetId=spreadsheet_id,
range="A1",
valueInputOption="USER_ENTERED",
body={"values": [COLUMN_LABELS]}
).execute()
existing_ids = set()
else:
# Sync ID is now the last column (index 10)
# Sync ID is the last column (index 10)
existing_ids = {row[10] for row in values[1:] if len(row) > 10}
except Exception as e:
print(f"Error reading sheet (maybe empty?): {e}")
@@ -150,8 +191,12 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
transactions = fetch_transactions(df_str, dt_str)
print(f"Found {len(transactions)} transactions.")
# 3. Filter for new transactions
if dry_run:
print(f"Dry run: window {df_str} to {dt_str}, fetched {len(transactions)} transaction(s) from Fio")
# 3. Determine NEW/DUP for each transaction
new_rows = []
tx_statuses = []
for tx in transactions:
sync_id = generate_sync_id(tx)
if sync_id not in existing_ids:
@@ -169,24 +214,48 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
tx.get("bank_id", ""),
sync_id,
])
tx_statuses.append("NEW")
else:
tx_statuses.append("DUP")
# 4. Print table (before early-return so all transactions are shown including DUPs)
if print_fio_table and transactions:
_print_fio_table(transactions, tx_statuses)
if not new_rows:
print("No new transactions to sync.")
if dry_run:
print("Dry run: would sync 0 new transaction(s).")
else:
print("No new transactions to sync.")
return
# 4. Append to sheet
print(f"Appending {len(new_rows)} new transactions to the sheet...")
body = {"values": new_rows}
sheet.values().append(
spreadsheetId=spreadsheet_id,
range="A2", # Appends to the end of the sheet
valueInputOption="USER_ENTERED",
body=body
).execute()
print("Sync completed successfully.")
if sort_by_date:
sort_sheet_by_date(service, spreadsheet_id)
# 5. Append to sheet or print dry-run would-write lines
if dry_run:
for tx, status in zip(transactions, tx_statuses):
if status == "NEW":
print(
f"Dry run: would append"
f" date={tx.get('date', '')}"
f" amount={tx.get('amount', '')}"
f" sender={tx.get('sender', '')}"
f" vs={tx.get('vs', '')}"
f" message={tx.get('message', '')}"
)
if sort_by_date:
print("Dry run: would sort by date")
print(f"Dry run: would sync {len(new_rows)} new transaction(s).")
else:
print(f"Appending {len(new_rows)} new transactions to the sheet...")
body = {"values": new_rows}
sheet.values().append(
spreadsheetId=spreadsheet_id,
range="A2", # Appends to the end of the sheet
valueInputOption="USER_ENTERED",
body=body
).execute()
print("Sync completed successfully.")
if sort_by_date:
sort_sheet_by_date(service, spreadsheet_id)
def main():
@@ -197,16 +266,20 @@ def main():
parser.add_argument("--from", dest="date_from", help="Start date YYYY-MM-DD")
parser.add_argument("--to", dest="date_to", help="End date YYYY-MM-DD")
parser.add_argument("--sort-by-date", action="store_true", help="Sort the sheet by date after sync")
parser.add_argument("--dry-run", action="store_true", help="Fetch and dedup without writing to the sheet")
parser.add_argument("--print-fio-table", action="store_true", help="Print aligned table of all fetched transactions with NEW/DUP status (use with --dry-run)")
args = parser.parse_args()
try:
sync_to_sheets(
spreadsheet_id=args.sheet_id,
spreadsheet_id=args.sheet_id,
credentials_path=args.credentials,
days=args.days,
date_from_str=args.date_from,
date_to_str=args.date_to,
sort_by_date=args.sort_by_date
sort_by_date=args.sort_by_date,
dry_run=args.dry_run,
print_fio_table=args.print_fio_table,
)
except Exception as e:
print(f"Sync failed: {e}")