#!/usr/bin/env python3 """Match Fio bank payments against expected attendance fees.""" import argparse import json import os import re import urllib.request from datetime import datetime, timedelta from html.parser import HTMLParser from attendance import get_members_with_fees from czech_utils import normalize, parse_month_references # --------------------------------------------------------------------------- # Transaction fetching # --------------------------------------------------------------------------- class _FioTableParser(HTMLParser): """Parse the second on the Fio transparent page. Columns: Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka Indices: 0 1 2 3 4 5 6 7 8 """ def __init__(self): super().__init__() self._table_count = 0 self._in_target_table = False self._in_thead = False self._in_row = False self._in_cell = False self._current_row: list[str] = [] self._rows: list[list[str]] = [] self._cell_text = "" def handle_starttag(self, tag, attrs): cls = dict(attrs).get("class", "") if tag == "table" and "table" in cls.split(): self._table_count += 1 if self._table_count == 2: self._in_target_table = True if self._in_target_table: if tag == "thead": self._in_thead = True if tag == "tr" and not self._in_thead: self._in_row = True self._current_row = [] if self._in_row and tag in ("td", "th"): self._in_cell = True self._cell_text = "" def handle_endtag(self, tag): if self._in_cell and tag in ("td", "th"): self._in_cell = False self._current_row.append(self._cell_text.strip()) if tag == "thead": self._in_thead = False if self._in_row and tag == "tr": self._in_row = False if self._current_row: self._rows.append(self._current_row) if tag == "table" and self._in_target_table: self._in_target_table = False def handle_data(self, data): if self._in_cell: self._cell_text += data def get_rows(self) -> list[list[str]]: return self._rows # Fio transparent table column indices _COL_DATE = 0 _COL_AMOUNT = 1 _COL_SENDER = 3 _COL_MESSAGE = 4 _COL_KS = 5 _COL_VS = 6 _COL_SS = 7 _COL_NOTE = 8 def _parse_czech_amount(s: str) -> float | None: """Parse '1 500,00 CZK' to float.""" s = s.replace("\xa0", "").replace(" ", "").replace(",", ".") s = re.sub(r"[A-Za-z]+", "", s).strip() try: return float(s) except ValueError: return None def _parse_czech_date(s: str) -> str | None: """Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'.""" s = s.strip() for fmt in ("%d.%m.%Y", "%d/%m/%Y"): try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d") except ValueError: continue return None def fetch_transactions_transparent( date_from: str, date_to: str ) -> list[dict]: """Fetch transactions from Fio transparent account HTML page. Args: date_from: D.M.YYYY format date_to: D.M.YYYY format """ url = ( f"https://ib.fio.cz/ib/transparent?a=2800359168" f"&f={date_from}&t={date_to}" ) req = urllib.request.Request(url) with urllib.request.urlopen(req) as resp: html = resp.read().decode("utf-8") parser = _FioTableParser() parser.feed(html) rows = parser.get_rows() transactions = [] for row in rows: if len(row) < 5: continue def col(i): return row[i].strip() if i < len(row) else "" date_str = _parse_czech_date(col(_COL_DATE)) amount = _parse_czech_amount(col(_COL_AMOUNT)) if date_str is None or amount is None or amount <= 0: continue transactions.append({ "date": date_str, "amount": amount, "sender": col(_COL_SENDER), "message": col(_COL_MESSAGE), "vs": col(_COL_VS), }) return transactions def fetch_transactions_api( token: str, date_from: str, date_to: str ) -> list[dict]: """Fetch transactions via Fio REST API (JSON). Args: token: Fio API token date_from: YYYY-MM-DD format date_to: YYYY-MM-DD format """ url = ( f"https://fioapi.fio.cz/v1/rest/periods/{token}" f"/{date_from}/{date_to}/transactions.json" ) req = urllib.request.Request(url) with urllib.request.urlopen(req) as resp: data = json.loads(resp.read().decode("utf-8")) transactions = [] tx_list = data.get("accountStatement", {}).get("transactionList", {}) for tx in (tx_list.get("transaction") or []): # Each field is {"value": ..., "name": ..., "id": ...} or null def val(col_id): col = tx.get(f"column{col_id}") return col["value"] if col else "" amount = float(val(1) or 0) if amount <= 0: continue # Skip outgoing date_raw = val(0) or "" # API returns date as "YYYY-MM-DD+HHMM" or ISO format date_str = date_raw[:10] if date_raw else "" transactions.append({ "date": date_str, "amount": amount, "sender": str(val(10) or ""), # column10 = sender name "message": str(val(16) or ""), # column16 = message for recipient "vs": str(val(5) or ""), # column5 = VS "user_id": str(val(7) or ""), # column7 = user identification "sender_account": str(val(2) or ""), # column2 = sender account }) return transactions def fetch_transactions(date_from: str, date_to: str) -> list[dict]: """Fetch transactions, using API if token available, else transparent page.""" token = os.environ.get("FIO_API_TOKEN", "").strip() if token: return fetch_transactions_api(token, date_from, date_to) # Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL from_dt = datetime.strptime(date_from, "%Y-%m-%d") to_dt = datetime.strptime(date_to, "%Y-%m-%d") return fetch_transactions_transparent( from_dt.strftime("%-d.%-m.%Y"), to_dt.strftime("%-d.%-m.%Y"), ) # --------------------------------------------------------------------------- # Name matching # --------------------------------------------------------------------------- def _build_name_variants(name: str) -> list[str]: """Build searchable name variants from a member name. E.g. 'František Vrbík (Štrúdl)' → ['frantisek vrbik', 'strudl', 'vrbik'] """ # Extract nickname from parentheses nickname_match = re.search(r"\(([^)]+)\)", name) nickname = nickname_match.group(1) if nickname_match else "" # Base name without nickname base = re.sub(r"\s*\([^)]*\)\s*", " ", name).strip() normalized_base = normalize(base) normalized_nick = normalize(nickname) variants = [normalized_base] if normalized_nick: variants.append(normalized_nick) # Also add last name alone (for matching in messages) parts = normalized_base.split() if len(parts) >= 2: variants.append(parts[-1]) # last name variants.append(parts[0]) # first name return [v for v in variants if len(v) >= 3] def match_members( text: str, member_names: list[str] ) -> list[tuple[str, str]]: """Find members mentioned in text. Returns list of (member_name, confidence) where confidence is 'auto' or 'review'. """ normalized_text = normalize(text) matches = [] for name in member_names: variants = _build_name_variants(name) # Full name match = high confidence full_name = variants[0] if variants else "" if full_name and full_name in normalized_text: matches.append((name, "auto")) continue # Last name + first name both present = high confidence parts = full_name.split() if len(parts) >= 2: if parts[0] in normalized_text and parts[-1] in normalized_text: matches.append((name, "auto")) continue # Nickname match = high confidence if len(variants) > 1 and variants[1] in normalized_text: matches.append((name, "auto")) continue # Last name only = lower confidence, but skip very common Czech surnames _COMMON_SURNAMES = {"novak", "novakova", "prach"} if ( len(parts) >= 2 and len(parts[-1]) >= 4 and parts[-1] not in _COMMON_SURNAMES and parts[-1] in normalized_text ): matches.append((name, "review")) continue return matches # --------------------------------------------------------------------------- # Reconciliation # --------------------------------------------------------------------------- def reconcile( members: list[tuple[str, str, dict[str, int]]], sorted_months: list[str], transactions: list[dict], ) -> dict: """Match transactions to members and months. Returns a dict with: - 'members': {name: {'tier': str, 'months': {YYYY-MM: {'expected': int, 'paid': int, 'transactions': list}}}} - 'unmatched': list of transactions that couldn't be matched - 'credits': {name: int} — excess payments tracked as credit """ member_names = [name for name, _, _ in members] member_tiers = {name: tier for name, tier, _ in members} member_fees = {name: {m: fee for m, (fee, _) in fees.items()} for name, _, fees in members} # Initialize ledger ledger: dict[str, dict[str, dict]] = {} for name in member_names: ledger[name] = {} for m in sorted_months: ledger[name][m] = { "expected": member_fees[name].get(m, 0), "paid": 0, "transactions": [], } unmatched = [] credits: dict[str, int] = {} for tx in transactions: # Combine sender + message for searching search_text = f"{tx['sender']} {tx['message']} {tx.get('user_id', '')}" matched_members = match_members(search_text, member_names) matched_months = parse_month_references( tx["message"] + " " + tx.get("user_id", "") ) if not matched_members: # Try matching sender name alone with more lenient matching matched_members = match_members(tx["sender"], member_names) if not matched_members: unmatched.append(tx) continue if not matched_months: # If no month specified, try to infer from payment date tx_date = tx["date"] if tx_date: try: dt = datetime.strptime(tx_date, "%Y-%m-%d") # Assume payment is for the current month matched_months = [dt.strftime("%Y-%m")] except ValueError: pass if not matched_months: unmatched.append(tx) continue # Allocate payment across matched members and months num_allocations = len(matched_members) * len(matched_months) per_allocation = tx["amount"] / num_allocations if num_allocations > 0 else 0 for member_name, confidence in matched_members: for month_key in matched_months: entry = { "amount": per_allocation, "date": tx["date"], "sender": tx["sender"], "message": tx["message"], "confidence": confidence, } if month_key in ledger.get(member_name, {}): ledger[member_name][month_key]["paid"] += per_allocation ledger[member_name][month_key]["transactions"].append(entry) else: # Future month — track as credit credits[member_name] = credits.get(member_name, 0) + int(per_allocation) return { "members": { name: { "tier": member_tiers[name], "months": ledger[name], } for name in member_names }, "unmatched": unmatched, "credits": credits, } # --------------------------------------------------------------------------- # Report output # --------------------------------------------------------------------------- def print_report(result: dict, sorted_months: list[str]): month_labels = { m: datetime.strptime(m, "%Y-%m").strftime("%b %Y") for m in sorted_months } # --- Per-member breakdown (adults only) --- print("=" * 80) print("PAYMENT RECONCILIATION REPORT") print("=" * 80) adults = { name: data for name, data in result["members"].items() if data["tier"] == "A" } total_expected = 0 total_paid = 0 # Summary table name_width = max((len(n) for n in adults), default=20) header = f"{'Member':<{name_width}}" for m in sorted_months: header += f" | {month_labels[m]:>10}" header += " | {'Balance':>10}" print(f"\n{'Member':<{name_width}}", end="") for m in sorted_months: print(f" | {month_labels[m]:>10}", end="") print(f" | {'Balance':>10}") print("-" * (name_width + (len(sorted_months) + 1) * 13)) for name in sorted(adults.keys()): data = adults[name] line = f"{name:<{name_width}}" member_balance = 0 for m in sorted_months: mdata = data["months"].get(m, {"expected": 0, "paid": 0}) expected = mdata["expected"] paid = int(mdata["paid"]) total_expected += expected total_paid += paid if expected == 0 and paid == 0: cell = "-" elif paid >= expected and expected > 0: cell = "OK" elif paid > 0: cell = f"{paid}/{expected}" else: cell = f"UNPAID {expected}" member_balance += paid - expected line += f" | {cell:>10}" balance_str = f"{member_balance:+d}" if member_balance != 0 else "0" line += f" | {balance_str:>10}" print(line) print("-" * (name_width + (len(sorted_months) + 1) * 13)) print(f"{'TOTAL':<{name_width}}", end="") for _ in sorted_months: print(f" | {'':>10}", end="") balance = total_paid - total_expected print(f" | {f'Expected: {total_expected}, Paid: {int(total_paid)}, Balance: {balance:+d}'}") # --- Credits --- if result["credits"]: print(f"\n{'CREDITS (advance payments for future months)':}") for name, amount in sorted(result["credits"].items()): print(f" {name}: {amount} CZK") # --- Unmatched transactions --- if result["unmatched"]: print(f"\n{'UNMATCHED TRANSACTIONS (need manual review)':}") print(f" {'Date':<12} {'Amount':>10} {'Sender':<30} {'Message'}") print(f" {'-'*12} {'-'*10} {'-'*30} {'-'*30}") for tx in result["unmatched"]: print( f" {tx['date']:<12} {tx['amount']:>10.0f} " f"{tx['sender']:<30} {tx['message']}" ) # --- Detailed matched transactions --- print(f"\n{'MATCHED TRANSACTION DETAILS':}") for name in sorted(adults.keys()): data = adults[name] has_payments = any( data["months"].get(m, {}).get("transactions") for m in sorted_months ) if not has_payments: continue print(f"\n {name}:") for m in sorted_months: mdata = data["months"].get(m, {}) for tx in mdata.get("transactions", []): conf = " [REVIEW]" if tx["confidence"] == "review" else "" print( f" {month_labels[m]}: {tx['amount']:.0f} CZK " f"from {tx['sender']} — \"{tx['message']}\"{conf}" ) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser( description="Match bank payments against expected attendance fees." ) parser.add_argument( "--from", dest="date_from", default="2025-12-01", help="Start date YYYY-MM-DD (default: 2025-12-01)", ) parser.add_argument( "--to", dest="date_to", default=datetime.now().strftime("%Y-%m-%d"), help="End date YYYY-MM-DD (default: today)", ) args = parser.parse_args() print(f"Fetching attendance data...") members, sorted_months = get_members_with_fees() if not members: print("No attendance data found.") return print(f"Fetching transactions from {args.date_from} to {args.date_to}...") transactions = fetch_transactions(args.date_from, args.date_to) print(f"Found {len(transactions)} incoming transactions.\n") result = reconcile(members, sorted_months, transactions) print_report(result, sorted_months) if __name__ == "__main__": main()