fix(python): parse Fio 2-digit-year dates + add make sync-debug
All checks were successful
Deploy to K8s / deploy (push) Successful in 8s
Build and Push / build (push) Successful in 6s
Build and Push / build-go (push) Successful in 59s

Fio's transparent page now serves recent rows as DD.MM.YY while older
rows stay DD.MM.YYYY. parse_czech_date only knew the 4-digit form so
every recent transfer was silently dropped — make sync-2026 reported
zero new transactions. Adds %d.%m.%y and %d/%m/%y to the format list,
mirroring the Go-side fix from 2026-05-07.

Also adds a Python analog of make go-sync-debug:
- --dry-run skips header write / append / sort and prints "would …" lines
- --print-fio-table prints aligned per-txn table with NEW/DUP status
- make sync-debug [DAYS=N] wrapper (default DAYS=30)
- always-on stderr diagnostics in fio_utils: which fetcher was chosen
  (with FIO_API_TOKEN-unset lag warning) + raw-vs-filtered counts, so
  this class of "scraper drops everything" bug surfaces immediately.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 22:56:49 +02:00
parent f25552eef2
commit aaa876e593
4 changed files with 126 additions and 31 deletions

View File

@@ -1,5 +1,11 @@
# Changelog # Changelog
## 2026-05-11 22:56 CEST — fix(python): parse Fio 2-digit-year dates + add `make sync-debug` dry-run tool
- Fix: `scripts/fio_utils.py` `parse_czech_date` now accepts `DD.MM.YY` / `D.M.YY` in addition to the 4-digit-year variants. Fio's transparent page now mixes both forms in the same response; the 2-digit rows were being silently dropped, which caused `make sync-2026` to miss every recent transfer. Mirrors the Go-side fix from 2026-05-07 (CHANGELOG entry below).
- Added `--dry-run` and `--print-fio-table` flags to `scripts/sync_fio_to_sheets.py`, plus a `make sync-debug [DAYS=N]` Makefile target. Mirrors `make go-sync-debug`: fetches from Fio and dedupes against the sheet, prints `STATUS=NEW/DUP` per transaction, and prints per-row `Dry run: would append …` lines + `would sort by date` instead of touching the sheet.
- Added always-on stderr diagnostics in `scripts/fio_utils.py`: which fetcher was selected (authenticated API vs. transparent-page scraper with `FIO_API_TOKEN`-unset warning), and raw-vs-after-filter transaction counts on both paths — so this class of "scraper drops everything" bug surfaces immediately.
## 2026-05-08 14:55 CEST — feat(go): M6.6.1 — Pay-button QR popup modal ## 2026-05-08 14:55 CEST — feat(go): M6.6.1 — Pay-button QR popup modal
- Restored the Python `showPayQR` in-page modal UX that was lost in M6.6 (Pay buttons were navigating the tab to the raw `/qr` PNG). - Restored the Python `showPayQR` in-page modal UX that was lost in M6.6 (Pay buttons were navigating the tab to the raw `/qr` PNG).

View File

@@ -35,6 +35,7 @@ help:
@echo " make sync - Sync Fio transactions to Google Sheets" @echo " make sync - Sync Fio transactions to Google Sheets"
@echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)" @echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)"
@echo " make sync-2026 - Sync Fio transactions for the whole year of 2026" @echo " make sync-2026 - Sync Fio transactions for the whole year of 2026"
@echo " make sync-debug [DAYS=N] - Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)"
@echo " make infer - Infer payment details (Person, Purpose, Amount) in the sheet" @echo " make infer - Infer payment details (Person, Purpose, Amount) in the sheet"
@echo " make reconcile - Show balance report using Google Sheets data" @echo " make reconcile - Show balance report using Google Sheets data"
@echo " make venv - Sync virtual environment with pyproject.toml" @echo " make venv - Sync virtual environment with pyproject.toml"
@@ -125,6 +126,9 @@ sync-2025: $(PYTHON)
sync-2026: $(PYTHON) sync-2026: $(PYTHON)
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --from 2026-01-01 --to 2026-12-31 --sort-by-date $(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --from 2026-01-01 --to 2026-12-31 --sort-by-date
sync-debug: $(PYTHON) ## Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --days $(DAYS) --dry-run --print-fio-table
infer: $(PYTHON) infer: $(PYTHON)
$(PYTHON) scripts/infer_payments.py --credentials $(CREDENTIALS) $(PYTHON) scripts/infer_payments.py --credentials $(CREDENTIALS)

View File

@@ -4,6 +4,7 @@
import json import json
import os import os
import re import re
import sys
import urllib.request import urllib.request
from datetime import datetime from datetime import datetime
from html.parser import HTMLParser from html.parser import HTMLParser
@@ -89,9 +90,11 @@ def parse_czech_amount(s: str) -> float | None:
def parse_czech_date(s: str) -> str | None: def parse_czech_date(s: str) -> str | None:
"""Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'.""" """Parse a Czech date to 'YYYY-MM-DD'. Accepts 4-digit and 2-digit years
with dot or slash separators; Fio's transparent page mixes 'DD.MM.YYYY'
and 'DD.MM.YY' in the same response."""
s = s.strip() s = s.strip()
for fmt in ("%d.%m.%Y", "%d/%m/%Y"): for fmt in ("%d.%m.%Y", "%d/%m/%Y", "%d.%m.%y", "%d/%m/%y"):
try: try:
return datetime.strptime(s, fmt).strftime("%Y-%m-%d") return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
except ValueError: except ValueError:
@@ -146,6 +149,7 @@ def fetch_transactions_transparent(
"bank_id": "", # HTML scraping doesn't give stable ID "bank_id": "", # HTML scraping doesn't give stable ID
}) })
print(f"fio: transparent fetched {len(rows)} raw rows, {len(transactions)} transaction(s) after filtering", file=sys.stderr)
return transactions return transactions
@@ -169,7 +173,8 @@ def fetch_transactions_api(
transactions = [] transactions = []
tx_list = data.get("accountStatement", {}).get("transactionList", {}) tx_list = data.get("accountStatement", {}).get("transactionList", {})
for tx in (tx_list.get("transaction") or []): raw_list = tx_list.get("transaction") or []
for tx in raw_list:
# Each field is {"value": ..., "name": ..., "id": ...} or null # Each field is {"value": ..., "name": ..., "id": ...} or null
def val(col_id): def val(col_id):
col = tx.get(f"column{col_id}") col = tx.get(f"column{col_id}")
@@ -197,6 +202,7 @@ def fetch_transactions_api(
"currency": str(val(14) or "CZK"), # column14 = Currency "currency": str(val(14) or "CZK"), # column14 = Currency
}) })
print(f"fio: api fetched {len(raw_list)} raw transaction(s), {len(transactions)} after filtering", file=sys.stderr)
return transactions return transactions
@@ -204,8 +210,14 @@ def fetch_transactions(date_from: str, date_to: str) -> list[dict]:
"""Fetch transactions, using API if token available, else transparent page.""" """Fetch transactions, using API if token available, else transparent page."""
token = os.environ.get("FIO_API_TOKEN", "").strip() token = os.environ.get("FIO_API_TOKEN", "").strip()
if token: if token:
print(f"fio: using authenticated API, window {date_from}..{date_to}", file=sys.stderr)
return fetch_transactions_api(token, date_from, date_to) return fetch_transactions_api(token, date_from, date_to)
print(
f"fio: using transparent page (FIO_API_TOKEN unset — expect publishing lag), "
f"window {date_from}..{date_to}, account=2800359168",
file=sys.stderr,
)
# Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL # Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL
from_dt = datetime.strptime(date_from, "%Y-%m-%d") from_dt = datetime.strptime(date_from, "%Y-%m-%d")
to_dt = datetime.strptime(date_to, "%Y-%m-%d") to_dt = datetime.strptime(date_to, "%Y-%m-%d")

View File

@@ -77,6 +77,35 @@ def generate_sync_id(tx: dict) -> str:
return hashlib.sha256(raw_str.encode("utf-8")).hexdigest() return hashlib.sha256(raw_str.encode("utf-8")).hexdigest()
def _trunc(s: str, n: int = 40) -> str:
s = str(s)
return s if len(s) <= n else s[: n - 1] + ""
def _print_fio_table(transactions: list[dict], statuses: list[str]) -> None:
headers = ["DATE", "AMOUNT", "SENDER", "VS", "MESSAGE", "BANKID", "STATUS"]
rows = [
[
str(tx.get("date", "")),
f"{float(tx.get('amount', 0)):.2f}",
str(tx.get("sender", "")),
str(tx.get("vs", "")),
_trunc(str(tx.get("message", ""))),
str(tx.get("bank_id", "")),
status,
]
for tx, status in zip(transactions, statuses)
]
widths = [
max(len(headers[i]), max((len(r[i]) for r in rows), default=0))
for i in range(len(headers))
]
sep = " "
print(sep.join(h.ljust(w) for h, w in zip(headers, widths)))
for row in rows:
print(sep.join(cell.ljust(w) for cell, w in zip(row, widths)))
def sort_sheet_by_date(service, spreadsheet_id): def sort_sheet_by_date(service, spreadsheet_id):
"""Sort the sheet by the Date column (Column B).""" """Sort the sheet by the Date column (Column B)."""
# Get the sheet ID (gid) of the first sheet # Get the sheet ID (gid) of the first sheet
@@ -104,12 +133,21 @@ def sort_sheet_by_date(service, spreadsheet_id):
print("Sheet sorted by date.") print("Sheet sorted by date.")
def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None, date_from_str: str = None, date_to_str: str = None, sort_by_date: bool = False): def sync_to_sheets(
spreadsheet_id: str,
credentials_path: str,
days: int = None,
date_from_str: str = None,
date_to_str: str = None,
sort_by_date: bool = False,
dry_run: bool = False,
print_fio_table: bool = False,
):
print(f"Connecting to Google Sheets using {credentials_path}...") print(f"Connecting to Google Sheets using {credentials_path}...")
service = get_sheets_service(credentials_path) service = get_sheets_service(credentials_path)
sheet = service.spreadsheets() sheet = service.spreadsheets()
# 1. Fetch existing IDs from Column G (last column in A-G range) # 1. Read existing sync IDs from Column K
print(f"Reading existing sync IDs from sheet...") print(f"Reading existing sync IDs from sheet...")
try: try:
result = sheet.values().get( result = sheet.values().get(
@@ -117,19 +155,22 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
range="A1:K" # Include header and all columns to check Sync ID range="A1:K" # Include header and all columns to check Sync ID
).execute() ).execute()
values = result.get("values", []) values = result.get("values", [])
# Check and insert labels if missing # Check and insert labels if missing
if not values or values[0] != COLUMN_LABELS: if not values or values[0] != COLUMN_LABELS:
print("Inserting column labels...") if dry_run:
sheet.values().update( print("Dry run: would write header row")
spreadsheetId=spreadsheet_id, else:
range="A1", print("Inserting column labels...")
valueInputOption="USER_ENTERED", sheet.values().update(
body={"values": [COLUMN_LABELS]} spreadsheetId=spreadsheet_id,
).execute() range="A1",
valueInputOption="USER_ENTERED",
body={"values": [COLUMN_LABELS]}
).execute()
existing_ids = set() existing_ids = set()
else: else:
# Sync ID is now the last column (index 10) # Sync ID is the last column (index 10)
existing_ids = {row[10] for row in values[1:] if len(row) > 10} existing_ids = {row[10] for row in values[1:] if len(row) > 10}
except Exception as e: except Exception as e:
print(f"Error reading sheet (maybe empty?): {e}") print(f"Error reading sheet (maybe empty?): {e}")
@@ -150,8 +191,12 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
transactions = fetch_transactions(df_str, dt_str) transactions = fetch_transactions(df_str, dt_str)
print(f"Found {len(transactions)} transactions.") print(f"Found {len(transactions)} transactions.")
# 3. Filter for new transactions if dry_run:
print(f"Dry run: window {df_str} to {dt_str}, fetched {len(transactions)} transaction(s) from Fio")
# 3. Determine NEW/DUP for each transaction
new_rows = [] new_rows = []
tx_statuses = []
for tx in transactions: for tx in transactions:
sync_id = generate_sync_id(tx) sync_id = generate_sync_id(tx)
if sync_id not in existing_ids: if sync_id not in existing_ids:
@@ -169,24 +214,48 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
tx.get("bank_id", ""), tx.get("bank_id", ""),
sync_id, sync_id,
]) ])
tx_statuses.append("NEW")
else:
tx_statuses.append("DUP")
# 4. Print table (before early-return so all transactions are shown including DUPs)
if print_fio_table and transactions:
_print_fio_table(transactions, tx_statuses)
if not new_rows: if not new_rows:
print("No new transactions to sync.") if dry_run:
print("Dry run: would sync 0 new transaction(s).")
else:
print("No new transactions to sync.")
return return
# 4. Append to sheet # 5. Append to sheet or print dry-run would-write lines
print(f"Appending {len(new_rows)} new transactions to the sheet...") if dry_run:
body = {"values": new_rows} for tx, status in zip(transactions, tx_statuses):
sheet.values().append( if status == "NEW":
spreadsheetId=spreadsheet_id, print(
range="A2", # Appends to the end of the sheet f"Dry run: would append"
valueInputOption="USER_ENTERED", f" date={tx.get('date', '')}"
body=body f" amount={tx.get('amount', '')}"
).execute() f" sender={tx.get('sender', '')}"
print("Sync completed successfully.") f" vs={tx.get('vs', '')}"
f" message={tx.get('message', '')}"
if sort_by_date: )
sort_sheet_by_date(service, spreadsheet_id) if sort_by_date:
print("Dry run: would sort by date")
print(f"Dry run: would sync {len(new_rows)} new transaction(s).")
else:
print(f"Appending {len(new_rows)} new transactions to the sheet...")
body = {"values": new_rows}
sheet.values().append(
spreadsheetId=spreadsheet_id,
range="A2", # Appends to the end of the sheet
valueInputOption="USER_ENTERED",
body=body
).execute()
print("Sync completed successfully.")
if sort_by_date:
sort_sheet_by_date(service, spreadsheet_id)
def main(): def main():
@@ -197,16 +266,20 @@ def main():
parser.add_argument("--from", dest="date_from", help="Start date YYYY-MM-DD") parser.add_argument("--from", dest="date_from", help="Start date YYYY-MM-DD")
parser.add_argument("--to", dest="date_to", help="End date YYYY-MM-DD") parser.add_argument("--to", dest="date_to", help="End date YYYY-MM-DD")
parser.add_argument("--sort-by-date", action="store_true", help="Sort the sheet by date after sync") parser.add_argument("--sort-by-date", action="store_true", help="Sort the sheet by date after sync")
parser.add_argument("--dry-run", action="store_true", help="Fetch and dedup without writing to the sheet")
parser.add_argument("--print-fio-table", action="store_true", help="Print aligned table of all fetched transactions with NEW/DUP status (use with --dry-run)")
args = parser.parse_args() args = parser.parse_args()
try: try:
sync_to_sheets( sync_to_sheets(
spreadsheet_id=args.sheet_id, spreadsheet_id=args.sheet_id,
credentials_path=args.credentials, credentials_path=args.credentials,
days=args.days, days=args.days,
date_from_str=args.date_from, date_from_str=args.date_from,
date_to_str=args.date_to, date_to_str=args.date_to,
sort_by_date=args.sort_by_date sort_by_date=args.sort_by_date,
dry_run=args.dry_run,
print_fio_table=args.print_fio_table,
) )
except Exception as e: except Exception as e:
print(f"Sync failed: {e}") print(f"Sync failed: {e}")