fix(python): parse Fio 2-digit-year dates + add make sync-debug
Fio's transparent page now serves recent rows as DD.MM.YY while older rows stay DD.MM.YYYY. parse_czech_date only knew the 4-digit form so every recent transfer was silently dropped — make sync-2026 reported zero new transactions. Adds %d.%m.%y and %d/%m/%y to the format list, mirroring the Go-side fix from 2026-05-07. Also adds a Python analog of make go-sync-debug: - --dry-run skips header write / append / sort and prints "would …" lines - --print-fio-table prints aligned per-txn table with NEW/DUP status - make sync-debug [DAYS=N] wrapper (default DAYS=30) - always-on stderr diagnostics in fio_utils: which fetcher was chosen (with FIO_API_TOKEN-unset lag warning) + raw-vs-filtered counts, so this class of "scraper drops everything" bug surfaces immediately. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-05-11 22:56 CEST — fix(python): parse Fio 2-digit-year dates + add `make sync-debug` dry-run tool
|
||||
|
||||
- Fix: `scripts/fio_utils.py` `parse_czech_date` now accepts `DD.MM.YY` / `D.M.YY` in addition to the 4-digit-year variants. Fio's transparent page now mixes both forms in the same response; the 2-digit rows were being silently dropped, which caused `make sync-2026` to miss every recent transfer. Mirrors the Go-side fix from 2026-05-07 (CHANGELOG entry below).
|
||||
- Added `--dry-run` and `--print-fio-table` flags to `scripts/sync_fio_to_sheets.py`, plus a `make sync-debug [DAYS=N]` Makefile target. Mirrors `make go-sync-debug`: fetches from Fio and dedupes against the sheet, prints `STATUS=NEW/DUP` per transaction, and prints per-row `Dry run: would append …` lines + `would sort by date` instead of touching the sheet.
|
||||
- Added always-on stderr diagnostics in `scripts/fio_utils.py`: which fetcher was selected (authenticated API vs. transparent-page scraper with `FIO_API_TOKEN`-unset warning), and raw-vs-after-filter transaction counts on both paths — so this class of "scraper drops everything" bug surfaces immediately.
|
||||
|
||||
## 2026-05-08 14:55 CEST — feat(go): M6.6.1 — Pay-button QR popup modal
|
||||
|
||||
- Restored the Python `showPayQR` in-page modal UX that was lost in M6.6 (Pay buttons were navigating the tab to the raw `/qr` PNG).
|
||||
|
||||
4
Makefile
4
Makefile
@@ -35,6 +35,7 @@ help:
|
||||
@echo " make sync - Sync Fio transactions to Google Sheets"
|
||||
@echo " make sync-2025 - Sync Fio transactions for Q4 2025 (Oct-Dec)"
|
||||
@echo " make sync-2026 - Sync Fio transactions for the whole year of 2026"
|
||||
@echo " make sync-debug [DAYS=N] - Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)"
|
||||
@echo " make infer - Infer payment details (Person, Purpose, Amount) in the sheet"
|
||||
@echo " make reconcile - Show balance report using Google Sheets data"
|
||||
@echo " make venv - Sync virtual environment with pyproject.toml"
|
||||
@@ -125,6 +126,9 @@ sync-2025: $(PYTHON)
|
||||
sync-2026: $(PYTHON)
|
||||
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --from 2026-01-01 --to 2026-12-31 --sort-by-date
|
||||
|
||||
sync-debug: $(PYTHON) ## Dry-run Python sync with Fio diagnostics and txn table (default DAYS=30)
|
||||
$(PYTHON) scripts/sync_fio_to_sheets.py --credentials .secret/fuj-management-bot-credentials.json --days $(DAYS) --dry-run --print-fio-table
|
||||
|
||||
infer: $(PYTHON)
|
||||
$(PYTHON) scripts/infer_payments.py --credentials $(CREDENTIALS)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
from html.parser import HTMLParser
|
||||
@@ -89,9 +90,11 @@ def parse_czech_amount(s: str) -> float | None:
|
||||
|
||||
|
||||
def parse_czech_date(s: str) -> str | None:
|
||||
"""Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'."""
|
||||
"""Parse a Czech date to 'YYYY-MM-DD'. Accepts 4-digit and 2-digit years
|
||||
with dot or slash separators; Fio's transparent page mixes 'DD.MM.YYYY'
|
||||
and 'DD.MM.YY' in the same response."""
|
||||
s = s.strip()
|
||||
for fmt in ("%d.%m.%Y", "%d/%m/%Y"):
|
||||
for fmt in ("%d.%m.%Y", "%d/%m/%Y", "%d.%m.%y", "%d/%m/%y"):
|
||||
try:
|
||||
return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
|
||||
except ValueError:
|
||||
@@ -146,6 +149,7 @@ def fetch_transactions_transparent(
|
||||
"bank_id": "", # HTML scraping doesn't give stable ID
|
||||
})
|
||||
|
||||
print(f"fio: transparent fetched {len(rows)} raw rows, {len(transactions)} transaction(s) after filtering", file=sys.stderr)
|
||||
return transactions
|
||||
|
||||
|
||||
@@ -169,7 +173,8 @@ def fetch_transactions_api(
|
||||
|
||||
transactions = []
|
||||
tx_list = data.get("accountStatement", {}).get("transactionList", {})
|
||||
for tx in (tx_list.get("transaction") or []):
|
||||
raw_list = tx_list.get("transaction") or []
|
||||
for tx in raw_list:
|
||||
# Each field is {"value": ..., "name": ..., "id": ...} or null
|
||||
def val(col_id):
|
||||
col = tx.get(f"column{col_id}")
|
||||
@@ -197,6 +202,7 @@ def fetch_transactions_api(
|
||||
"currency": str(val(14) or "CZK"), # column14 = Currency
|
||||
})
|
||||
|
||||
print(f"fio: api fetched {len(raw_list)} raw transaction(s), {len(transactions)} after filtering", file=sys.stderr)
|
||||
return transactions
|
||||
|
||||
|
||||
@@ -204,8 +210,14 @@ def fetch_transactions(date_from: str, date_to: str) -> list[dict]:
|
||||
"""Fetch transactions, using API if token available, else transparent page."""
|
||||
token = os.environ.get("FIO_API_TOKEN", "").strip()
|
||||
if token:
|
||||
print(f"fio: using authenticated API, window {date_from}..{date_to}", file=sys.stderr)
|
||||
return fetch_transactions_api(token, date_from, date_to)
|
||||
|
||||
print(
|
||||
f"fio: using transparent page (FIO_API_TOKEN unset — expect publishing lag), "
|
||||
f"window {date_from}..{date_to}, account=2800359168",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL
|
||||
from_dt = datetime.strptime(date_from, "%Y-%m-%d")
|
||||
to_dt = datetime.strptime(date_to, "%Y-%m-%d")
|
||||
|
||||
@@ -77,6 +77,35 @@ def generate_sync_id(tx: dict) -> str:
|
||||
return hashlib.sha256(raw_str.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _trunc(s: str, n: int = 40) -> str:
|
||||
s = str(s)
|
||||
return s if len(s) <= n else s[: n - 1] + "…"
|
||||
|
||||
|
||||
def _print_fio_table(transactions: list[dict], statuses: list[str]) -> None:
|
||||
headers = ["DATE", "AMOUNT", "SENDER", "VS", "MESSAGE", "BANKID", "STATUS"]
|
||||
rows = [
|
||||
[
|
||||
str(tx.get("date", "")),
|
||||
f"{float(tx.get('amount', 0)):.2f}",
|
||||
str(tx.get("sender", "")),
|
||||
str(tx.get("vs", "")),
|
||||
_trunc(str(tx.get("message", ""))),
|
||||
str(tx.get("bank_id", "")),
|
||||
status,
|
||||
]
|
||||
for tx, status in zip(transactions, statuses)
|
||||
]
|
||||
widths = [
|
||||
max(len(headers[i]), max((len(r[i]) for r in rows), default=0))
|
||||
for i in range(len(headers))
|
||||
]
|
||||
sep = " "
|
||||
print(sep.join(h.ljust(w) for h, w in zip(headers, widths)))
|
||||
for row in rows:
|
||||
print(sep.join(cell.ljust(w) for cell, w in zip(row, widths)))
|
||||
|
||||
|
||||
def sort_sheet_by_date(service, spreadsheet_id):
|
||||
"""Sort the sheet by the Date column (Column B)."""
|
||||
# Get the sheet ID (gid) of the first sheet
|
||||
@@ -104,12 +133,21 @@ def sort_sheet_by_date(service, spreadsheet_id):
|
||||
print("Sheet sorted by date.")
|
||||
|
||||
|
||||
def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None, date_from_str: str = None, date_to_str: str = None, sort_by_date: bool = False):
|
||||
def sync_to_sheets(
|
||||
spreadsheet_id: str,
|
||||
credentials_path: str,
|
||||
days: int = None,
|
||||
date_from_str: str = None,
|
||||
date_to_str: str = None,
|
||||
sort_by_date: bool = False,
|
||||
dry_run: bool = False,
|
||||
print_fio_table: bool = False,
|
||||
):
|
||||
print(f"Connecting to Google Sheets using {credentials_path}...")
|
||||
service = get_sheets_service(credentials_path)
|
||||
sheet = service.spreadsheets()
|
||||
|
||||
# 1. Fetch existing IDs from Column G (last column in A-G range)
|
||||
# 1. Read existing sync IDs from Column K
|
||||
print(f"Reading existing sync IDs from sheet...")
|
||||
try:
|
||||
result = sheet.values().get(
|
||||
@@ -117,19 +155,22 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
|
||||
range="A1:K" # Include header and all columns to check Sync ID
|
||||
).execute()
|
||||
values = result.get("values", [])
|
||||
|
||||
|
||||
# Check and insert labels if missing
|
||||
if not values or values[0] != COLUMN_LABELS:
|
||||
print("Inserting column labels...")
|
||||
sheet.values().update(
|
||||
spreadsheetId=spreadsheet_id,
|
||||
range="A1",
|
||||
valueInputOption="USER_ENTERED",
|
||||
body={"values": [COLUMN_LABELS]}
|
||||
).execute()
|
||||
if dry_run:
|
||||
print("Dry run: would write header row")
|
||||
else:
|
||||
print("Inserting column labels...")
|
||||
sheet.values().update(
|
||||
spreadsheetId=spreadsheet_id,
|
||||
range="A1",
|
||||
valueInputOption="USER_ENTERED",
|
||||
body={"values": [COLUMN_LABELS]}
|
||||
).execute()
|
||||
existing_ids = set()
|
||||
else:
|
||||
# Sync ID is now the last column (index 10)
|
||||
# Sync ID is the last column (index 10)
|
||||
existing_ids = {row[10] for row in values[1:] if len(row) > 10}
|
||||
except Exception as e:
|
||||
print(f"Error reading sheet (maybe empty?): {e}")
|
||||
@@ -150,8 +191,12 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
|
||||
transactions = fetch_transactions(df_str, dt_str)
|
||||
print(f"Found {len(transactions)} transactions.")
|
||||
|
||||
# 3. Filter for new transactions
|
||||
if dry_run:
|
||||
print(f"Dry run: window {df_str} to {dt_str}, fetched {len(transactions)} transaction(s) from Fio")
|
||||
|
||||
# 3. Determine NEW/DUP for each transaction
|
||||
new_rows = []
|
||||
tx_statuses = []
|
||||
for tx in transactions:
|
||||
sync_id = generate_sync_id(tx)
|
||||
if sync_id not in existing_ids:
|
||||
@@ -169,24 +214,48 @@ def sync_to_sheets(spreadsheet_id: str, credentials_path: str, days: int = None,
|
||||
tx.get("bank_id", ""),
|
||||
sync_id,
|
||||
])
|
||||
tx_statuses.append("NEW")
|
||||
else:
|
||||
tx_statuses.append("DUP")
|
||||
|
||||
# 4. Print table (before early-return so all transactions are shown including DUPs)
|
||||
if print_fio_table and transactions:
|
||||
_print_fio_table(transactions, tx_statuses)
|
||||
|
||||
if not new_rows:
|
||||
print("No new transactions to sync.")
|
||||
if dry_run:
|
||||
print("Dry run: would sync 0 new transaction(s).")
|
||||
else:
|
||||
print("No new transactions to sync.")
|
||||
return
|
||||
|
||||
# 4. Append to sheet
|
||||
print(f"Appending {len(new_rows)} new transactions to the sheet...")
|
||||
body = {"values": new_rows}
|
||||
sheet.values().append(
|
||||
spreadsheetId=spreadsheet_id,
|
||||
range="A2", # Appends to the end of the sheet
|
||||
valueInputOption="USER_ENTERED",
|
||||
body=body
|
||||
).execute()
|
||||
print("Sync completed successfully.")
|
||||
|
||||
if sort_by_date:
|
||||
sort_sheet_by_date(service, spreadsheet_id)
|
||||
# 5. Append to sheet or print dry-run would-write lines
|
||||
if dry_run:
|
||||
for tx, status in zip(transactions, tx_statuses):
|
||||
if status == "NEW":
|
||||
print(
|
||||
f"Dry run: would append"
|
||||
f" date={tx.get('date', '')}"
|
||||
f" amount={tx.get('amount', '')}"
|
||||
f" sender={tx.get('sender', '')}"
|
||||
f" vs={tx.get('vs', '')}"
|
||||
f" message={tx.get('message', '')}"
|
||||
)
|
||||
if sort_by_date:
|
||||
print("Dry run: would sort by date")
|
||||
print(f"Dry run: would sync {len(new_rows)} new transaction(s).")
|
||||
else:
|
||||
print(f"Appending {len(new_rows)} new transactions to the sheet...")
|
||||
body = {"values": new_rows}
|
||||
sheet.values().append(
|
||||
spreadsheetId=spreadsheet_id,
|
||||
range="A2", # Appends to the end of the sheet
|
||||
valueInputOption="USER_ENTERED",
|
||||
body=body
|
||||
).execute()
|
||||
print("Sync completed successfully.")
|
||||
if sort_by_date:
|
||||
sort_sheet_by_date(service, spreadsheet_id)
|
||||
|
||||
|
||||
def main():
|
||||
@@ -197,16 +266,20 @@ def main():
|
||||
parser.add_argument("--from", dest="date_from", help="Start date YYYY-MM-DD")
|
||||
parser.add_argument("--to", dest="date_to", help="End date YYYY-MM-DD")
|
||||
parser.add_argument("--sort-by-date", action="store_true", help="Sort the sheet by date after sync")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Fetch and dedup without writing to the sheet")
|
||||
parser.add_argument("--print-fio-table", action="store_true", help="Print aligned table of all fetched transactions with NEW/DUP status (use with --dry-run)")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
sync_to_sheets(
|
||||
spreadsheet_id=args.sheet_id,
|
||||
spreadsheet_id=args.sheet_id,
|
||||
credentials_path=args.credentials,
|
||||
days=args.days,
|
||||
date_from_str=args.date_from,
|
||||
date_to_str=args.date_to,
|
||||
sort_by_date=args.sort_by_date
|
||||
sort_by_date=args.sort_by_date,
|
||||
dry_run=args.dry_run,
|
||||
print_fio_table=args.print_fio_table,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Sync failed: {e}")
|
||||
|
||||
Reference in New Issue
Block a user