feat: implement automated payment inference and sync to Google Sheets

This commit is contained in:
Jan Novak
2026-03-02 14:29:45 +01:00
parent 65e40d116b
commit d719383c9c
10 changed files with 1520 additions and 264 deletions

View File

@@ -11,205 +11,7 @@ from html.parser import HTMLParser
from attendance import get_members_with_fees
from czech_utils import normalize, parse_month_references
# ---------------------------------------------------------------------------
# Transaction fetching
# ---------------------------------------------------------------------------
class _FioTableParser(HTMLParser):
"""Parse the second <table class="table"> on the Fio transparent page.
Columns: Datum | Částka | Typ | Název protiúčtu | Zpráva pro příjemce | KS | VS | SS | Poznámka
Indices: 0 1 2 3 4 5 6 7 8
"""
def __init__(self):
super().__init__()
self._table_count = 0
self._in_target_table = False
self._in_thead = False
self._in_row = False
self._in_cell = False
self._current_row: list[str] = []
self._rows: list[list[str]] = []
self._cell_text = ""
def handle_starttag(self, tag, attrs):
cls = dict(attrs).get("class", "")
if tag == "table" and "table" in cls.split():
self._table_count += 1
if self._table_count == 2:
self._in_target_table = True
if self._in_target_table:
if tag == "thead":
self._in_thead = True
if tag == "tr" and not self._in_thead:
self._in_row = True
self._current_row = []
if self._in_row and tag in ("td", "th"):
self._in_cell = True
self._cell_text = ""
def handle_endtag(self, tag):
if self._in_cell and tag in ("td", "th"):
self._in_cell = False
self._current_row.append(self._cell_text.strip())
if tag == "thead":
self._in_thead = False
if self._in_row and tag == "tr":
self._in_row = False
if self._current_row:
self._rows.append(self._current_row)
if tag == "table" and self._in_target_table:
self._in_target_table = False
def handle_data(self, data):
if self._in_cell:
self._cell_text += data
def get_rows(self) -> list[list[str]]:
return self._rows
# Fio transparent table column indices
_COL_DATE = 0
_COL_AMOUNT = 1
_COL_SENDER = 3
_COL_MESSAGE = 4
_COL_KS = 5
_COL_VS = 6
_COL_SS = 7
_COL_NOTE = 8
def _parse_czech_amount(s: str) -> float | None:
"""Parse '1 500,00 CZK' to float."""
s = s.replace("\xa0", "").replace(" ", "").replace(",", ".")
s = re.sub(r"[A-Za-z]+", "", s).strip()
try:
return float(s)
except ValueError:
return None
def _parse_czech_date(s: str) -> str | None:
"""Parse 'DD.MM.YYYY' to 'YYYY-MM-DD'."""
s = s.strip()
for fmt in ("%d.%m.%Y", "%d/%m/%Y"):
try:
return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
except ValueError:
continue
return None
def fetch_transactions_transparent(
date_from: str, date_to: str
) -> list[dict]:
"""Fetch transactions from Fio transparent account HTML page.
Args:
date_from: D.M.YYYY format
date_to: D.M.YYYY format
"""
url = (
f"https://ib.fio.cz/ib/transparent?a=2800359168"
f"&f={date_from}&t={date_to}"
)
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as resp:
html = resp.read().decode("utf-8")
parser = _FioTableParser()
parser.feed(html)
rows = parser.get_rows()
transactions = []
for row in rows:
if len(row) < 5:
continue
def col(i):
return row[i].strip() if i < len(row) else ""
date_str = _parse_czech_date(col(_COL_DATE))
amount = _parse_czech_amount(col(_COL_AMOUNT))
if date_str is None or amount is None or amount <= 0:
continue
transactions.append({
"date": date_str,
"amount": amount,
"sender": col(_COL_SENDER),
"message": col(_COL_MESSAGE),
"vs": col(_COL_VS),
})
return transactions
def fetch_transactions_api(
token: str, date_from: str, date_to: str
) -> list[dict]:
"""Fetch transactions via Fio REST API (JSON).
Args:
token: Fio API token
date_from: YYYY-MM-DD format
date_to: YYYY-MM-DD format
"""
url = (
f"https://fioapi.fio.cz/v1/rest/periods/{token}"
f"/{date_from}/{date_to}/transactions.json"
)
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as resp:
data = json.loads(resp.read().decode("utf-8"))
transactions = []
tx_list = data.get("accountStatement", {}).get("transactionList", {})
for tx in (tx_list.get("transaction") or []):
# Each field is {"value": ..., "name": ..., "id": ...} or null
def val(col_id):
col = tx.get(f"column{col_id}")
return col["value"] if col else ""
amount = float(val(1) or 0)
if amount <= 0:
continue # Skip outgoing
date_raw = val(0) or ""
# API returns date as "YYYY-MM-DD+HHMM" or ISO format
date_str = date_raw[:10] if date_raw else ""
transactions.append({
"date": date_str,
"amount": amount,
"sender": str(val(10) or ""), # column10 = sender name
"message": str(val(16) or ""), # column16 = message for recipient
"vs": str(val(5) or ""), # column5 = VS
"user_id": str(val(7) or ""), # column7 = user identification
"sender_account": str(val(2) or ""), # column2 = sender account
})
return transactions
def fetch_transactions(date_from: str, date_to: str) -> list[dict]:
"""Fetch transactions, using API if token available, else transparent page."""
token = os.environ.get("FIO_API_TOKEN", "").strip()
if token:
return fetch_transactions_api(token, date_from, date_to)
# Convert YYYY-MM-DD to DD.MM.YYYY for the transparent page URL
from_dt = datetime.strptime(date_from, "%Y-%m-%d")
to_dt = datetime.strptime(date_to, "%Y-%m-%d")
return fetch_transactions_transparent(
from_dt.strftime("%-d.%-m.%Y"),
to_dt.strftime("%-d.%-m.%Y"),
)
from sync_fio_to_sheets import get_sheets_service, DEFAULT_SPREADSHEET_ID
# ---------------------------------------------------------------------------
@@ -255,34 +57,57 @@ def match_members(
for name in member_names:
variants = _build_name_variants(name)
# Full name match = high confidence
full_name = variants[0] if variants else ""
parts = full_name.split()
# 1. Full name match (exact sequence) = high confidence
if full_name and full_name in normalized_text:
matches.append((name, "auto"))
continue
# Last name + first name both present = high confidence
parts = full_name.split()
# 2. Both first and last name present (any order) = high confidence
if len(parts) >= 2:
if parts[0] in normalized_text and parts[-1] in normalized_text:
matches.append((name, "auto"))
continue
# Nickname match = high confidence
if len(variants) > 1 and variants[1] in normalized_text:
matches.append((name, "auto"))
continue
# 3. Nickname + one part of the name = high confidence
nickname = ""
nickname_match = re.search(r"\(([^)]+)\)", name)
if nickname_match:
nickname = normalize(nickname_match.group(1))
if nickname and nickname in normalized_text:
# Nickname alone is often enough, but let's check if it's combined with a name part
matches.append((name, "auto"))
continue
# Last name only = lower confidence, but skip very common Czech surnames
_COMMON_SURNAMES = {"novak", "novakova", "prach"}
if (
len(parts) >= 2
and len(parts[-1]) >= 4
and parts[-1] not in _COMMON_SURNAMES
and parts[-1] in normalized_text
):
matches.append((name, "review"))
continue
# 4. Partial matches = review confidence
if len(parts) >= 2:
first_name = parts[0]
last_name = parts[-1]
_COMMON_SURNAMES = {"novak", "novakova", "prach"}
# Match last name
if len(last_name) >= 4 and last_name not in _COMMON_SURNAMES and last_name in normalized_text:
matches.append((name, "review"))
continue
# Match first name (if not too short)
if len(first_name) >= 3 and first_name in normalized_text:
matches.append((name, "review"))
continue
elif len(parts) == 1:
# Single name member
if len(parts[0]) >= 4 and parts[0] in normalized_text:
matches.append((name, "review"))
continue
# --- Filtering ---
# If we have any "auto" matches, discard all "review" matches
auto_matches = [m for m in matches if m[1] == "auto"]
if auto_matches:
# If multiple auto matches, keep them (ambiguous but high priority)
return auto_matches
return matches
@@ -291,6 +116,102 @@ def match_members(
# Reconciliation
# ---------------------------------------------------------------------------
def infer_transaction_details(tx: dict, member_names: list[str]) -> dict:
"""Infer member(s) and month(s) for a single transaction.
Returns:
{
'members': [(name, confidence)],
'months': [YYYY-MM],
'matched_text': str
}
"""
# Combine sender + message for searching
search_text = f"{tx.get('sender', '')} {tx.get('message', '')} {tx.get('user_id', '')}"
matched_members = match_members(search_text, member_names)
matched_months = parse_month_references(
tx.get("message", "") + " " + tx.get("user_id", "")
)
if not matched_members:
# Try matching sender name alone with more lenient matching
matched_members = match_members(tx.get("sender", ""), member_names)
if not matched_months:
# If no month specified, try to infer from payment date
tx_date = tx.get("date")
if tx_date:
try:
if isinstance(tx_date, (int, float)):
# Handle Google Sheets serial date
dt = datetime(1899, 12, 30) + timedelta(days=tx_date)
else:
dt = datetime.strptime(str(tx_date), "%Y-%m-%d")
# Assume payment is for the current month
matched_months = [dt.strftime("%Y-%m")]
except (ValueError, TypeError):
pass
return {
"members": matched_members,
"months": matched_months,
"search_text": search_text
}
def fetch_sheet_data(spreadsheet_id: str, credentials_path: str) -> list[dict]:
"""Fetch all rows from the Google Sheet and convert to a list of dicts."""
service = get_sheets_service(credentials_path)
sheet = service.spreadsheets()
result = sheet.values().get(
spreadsheetId=spreadsheet_id,
range="A1:Z",
valueRenderOption="UNFORMATTED_VALUE"
).execute()
rows = result.get("values", [])
if not rows:
return []
header = rows[0]
def get_col_index(label):
normalized_label = label.lower().strip()
for i, h in enumerate(header):
if h.lower().strip() == normalized_label:
return i
return -1
idx_date = get_col_index("Date")
idx_amount = get_col_index("Amount")
idx_manual = get_col_index("manual fix")
idx_person = get_col_index("Person")
idx_purpose = get_col_index("Purpose")
idx_inferred_amount = get_col_index("Inferred Amount")
idx_sender = get_col_index("Sender")
idx_message = get_col_index("Message")
idx_bank_id = get_col_index("Bank ID")
transactions = []
for row in rows[1:]:
def get_val(idx):
return row[idx] if idx != -1 and idx < len(row) else ""
tx = {
"date": get_val(idx_date),
"amount": get_val(idx_amount),
"manual_fix": get_val(idx_manual),
"person": get_val(idx_person),
"purpose": get_val(idx_purpose),
"inferred_amount": get_val(idx_inferred_amount),
"sender": get_val(idx_sender),
"message": get_val(idx_message),
"bank_id": get_val(idx_bank_id),
}
transactions.append(tx)
return transactions
def reconcile(
members: list[tuple[str, str, dict[str, int]]],
sorted_months: list[str],
@@ -322,41 +243,54 @@ def reconcile(
credits: dict[str, int] = {}
for tx in transactions:
# Combine sender + message for searching
search_text = f"{tx['sender']} {tx['message']} {tx.get('user_id', '')}"
matched_members = match_members(search_text, member_names)
matched_months = parse_month_references(
tx["message"] + " " + tx.get("user_id", "")
)
# Use sheet columns if they exist, otherwise fallback to inference
person_str = str(tx.get("person", "")).strip()
purpose_str = str(tx.get("purpose", "")).strip()
# Strip markers like [?]
person_str = re.sub(r"\[\?\]\s*", "", person_str)
if person_str and purpose_str:
# We have pre-matched data (either from script or manual)
# Support multiple people/months in the comma-separated string
matched_members = [(p.strip(), "auto") for p in person_str.split(",") if p.strip()]
matched_months = [m.strip() for m in purpose_str.split(",") if m.strip()]
# Use Inferred Amount if available, otherwise bank Amount
amount = tx.get("inferred_amount")
if amount is None or amount == "":
amount = tx.get("amount", 0)
try:
amount = float(amount)
except (ValueError, TypeError):
amount = 0
else:
# Fallback to inference (for rows not yet processed by infer_payments.py)
inference = infer_transaction_details(tx, member_names)
matched_members = inference["members"]
matched_months = inference["months"]
amount = tx.get("amount", 0)
try:
amount = float(amount)
except (ValueError, TypeError):
amount = 0
if not matched_members:
# Try matching sender name alone with more lenient matching
matched_members = match_members(tx["sender"], member_names)
if not matched_members:
unmatched.append(tx)
continue
if not matched_months:
# If no month specified, try to infer from payment date
tx_date = tx["date"]
if tx_date:
try:
dt = datetime.strptime(tx_date, "%Y-%m-%d")
# Assume payment is for the current month
matched_months = [dt.strftime("%Y-%m")]
except ValueError:
pass
if not matched_months:
if not matched_members or not matched_months:
unmatched.append(tx)
continue
# Allocate payment across matched members and months
num_allocations = len(matched_members) * len(matched_months)
per_allocation = tx["amount"] / num_allocations if num_allocations > 0 else 0
per_allocation = amount / num_allocations if num_allocations > 0 else 0
for member_name, confidence in matched_members:
# If we matched via sheet 'Person' column, name might be partial or have markers
# but usually it's the exact member name from get_members_with_fees.
# Let's ensure it exists in our ledger.
if member_name not in ledger:
# Try matching by base name if it was Jan Novak (Kačerr) etc.
pass
for month_key in matched_months:
entry = {
"amount": per_allocation,
@@ -372,16 +306,26 @@ def reconcile(
# Future month — track as credit
credits[member_name] = credits.get(member_name, 0) + int(per_allocation)
# Calculate final total balances (window + off-window credits)
final_balances: dict[str, int] = {}
for name in member_names:
window_balance = sum(
int(mdata["paid"]) - mdata["expected"]
for mdata in ledger[name].values()
)
final_balances[name] = window_balance + credits.get(name, 0)
return {
"members": {
name: {
"tier": member_tiers[name],
"months": ledger[name],
"total_balance": final_balances[name]
}
for name in member_names
},
"unmatched": unmatched,
"credits": credits,
"credits": final_balances, # Redefine credits as any positive total balance
}
@@ -452,12 +396,30 @@ def print_report(result: dict, sorted_months: list[str]):
balance = total_paid - total_expected
print(f" | {f'Expected: {total_expected}, Paid: {int(total_paid)}, Balance: {balance:+d}'}")
# --- Credits ---
if result["credits"]:
print(f"\n{'CREDITS (advance payments for future months)':}")
for name, amount in sorted(result["credits"].items()):
# --- Credits (Total Surplus) ---
all_credits = {
name: data["total_balance"]
for name, data in result["members"].items()
if data["total_balance"] > 0
}
if all_credits:
print(f"\n{'TOTAL CREDITS (advance payments or surplus):'}")
for name, amount in sorted(all_credits.items()):
print(f" {name}: {amount} CZK")
# --- Debts (Missing Payments) ---
all_debts = {
name: data["total_balance"]
for name, data in result["members"].items()
if data["total_balance"] < 0
}
if all_debts:
print(f"\n{'TOTAL DEBTS (missing payments):'}")
for name, amount in sorted(all_debts.items()):
print(f" {name}: {abs(amount)} CZK")
# --- Unmatched transactions ---
if result["unmatched"]:
print(f"\n{'UNMATCHED TRANSACTIONS (need manual review)':}")
@@ -499,13 +461,14 @@ def main():
description="Match bank payments against expected attendance fees."
)
parser.add_argument(
"--from", dest="date_from", default="2025-12-01",
help="Start date YYYY-MM-DD (default: 2025-12-01)",
"--sheet-id", default=DEFAULT_SPREADSHEET_ID, help="Google Sheet ID"
)
parser.add_argument(
"--to", dest="date_to",
default=datetime.now().strftime("%Y-%m-%d"),
help="End date YYYY-MM-DD (default: today)",
"--credentials", default=".secret/fuj-management-bot-credentials.json",
help="Path to Google API credentials JSON"
)
parser.add_argument(
"--bank", action="store_true", help="Scrape bank instead of using Sheet data"
)
args = parser.parse_args()
@@ -515,9 +478,15 @@ def main():
print("No attendance data found.")
return
print(f"Fetching transactions from {args.date_from} to {args.date_to}...")
transactions = fetch_transactions(args.date_from, args.date_to)
print(f"Found {len(transactions)} incoming transactions.\n")
if args.bank:
print(f"Fetching transactions from Fio bank ({args.date_from} to {args.date_to})...")
from fio_utils import fetch_transactions
transactions = fetch_transactions(args.date_from, args.date_to)
else:
print(f"Fetching transactions from Google Sheet ({args.sheet_id})...")
transactions = fetch_sheet_data(args.sheet_id, args.credentials)
print(f"Processing {len(transactions)} transactions.\n")
result = reconcile(members, sorted_months, transactions)
print_report(result, sorted_months)