feat: initial dashboard implementation and robust attendance parsing

- Added a Makefile to easily run project scripts (fees, match, web, image) - Modified attendance.py to dynamically handle a variable number of header rows from the Google Sheet - Updated both attendance calculations and calculate_fees terminal output to show actual attendance counts (e.g., '750 CZK (3)') - Created a Flask web dashboard (app.py and templates/fees.html) to view member fees in an attractive, condensed, terminal-like UI - Bound the Flask server to port 5000 and added a routing alias from '/' to '/fees' - Configured Python virtual environment (.venv) creation directly into the Makefile to resolve global pip install errors on macOS Co-authored-by: Antigravity <antigravity@deepmind.com>
2026-02-27 13:19:00 +01:00
commit 3bfea4e0a4
16 changed files with 1322 additions and 0 deletions
--- a/scripts/czech_utils.py
+++ b/scripts/czech_utils.py
@@ -0,0 +1,101 @@
+"""Czech text utilities — diacritics normalization and month parsing."""
+
+import re
+import unicodedata
+
+CZECH_MONTHS = {
+    "leden": 1, "ledna": 1, "lednu": 1,
+    "unor": 2, "unora": 2, "unoru": 2,
+    "brezen": 3, "brezna": 3, "breznu": 3,
+    "duben": 4, "dubna": 4, "dubnu": 4,
+    "kveten": 5, "kvetna": 5, "kvetnu": 5,
+    "cerven": 6, "cervna": 6, "cervnu": 6,
+    "cervenec": 7, "cervnce": 7, "cervenci": 7,
+    "srpen": 8, "srpna": 8, "srpnu": 8,
+    "zari": 9,
+    "rijen": 10, "rijna": 10, "rijnu": 10,
+    "listopad": 11, "listopadu": 11,
+    "prosinec": 12, "prosince": 12, "prosinci": 12,
+}
+
+
+def normalize(text: str) -> str:
+    """Strip diacritics and lowercase."""
+    nfkd = unicodedata.normalize("NFKD", text)
+    return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
+
+
+def parse_month_references(text: str, default_year: int = 2026) -> list[str]:
+    """Extract YYYY-MM month references from Czech free text.
+
+    Handles:
+    - Czech month names: "leden", "únor", "prosinec" (all declensions)
+    - Numeric: "01/26", "1/2026", "11+12/2025"
+    - Ranges: "listopad-leden" (November through January)
+    - Slash-separated numeric months: "11+12/2025"
+
+    Returns sorted list of unique YYYY-MM strings.
+    """
+    normalized = normalize(text)
+    results: set[str] = set()
+
+    # Pattern: numeric months with year, e.g. "11+12/2025", "01/26", "1/2026"
+    # Match groups of digits separated by + before a /year
+    numeric_pattern = re.findall(
+        r"([\d+]+)\s*/\s*(\d{2,4})", normalized
+    )
+    for months_part, year_str in numeric_pattern:
+        year = int(year_str)
+        if year < 100:
+            year += 2000
+        for m_str in months_part.split("+"):
+            m_str = m_str.strip()
+            if m_str.isdigit():
+                m = int(m_str)
+                if 1 <= m <= 12:
+                    results.add(f"{year:04d}-{m:02d}")
+
+    # Pattern: standalone numeric month.year, e.g. "12.2025"
+    dot_pattern = re.findall(r"(\d{1,2})\s*\.\s*(\d{4})", normalized)
+    for m_str, year_str in dot_pattern:
+        m, year = int(m_str), int(year_str)
+        if 1 <= m <= 12:
+            results.add(f"{year:04d}-{m:02d}")
+
+    # Czech month names — handle ranges like "listopad-leden"
+    # First, find range patterns
+    month_name_re = "|".join(sorted(CZECH_MONTHS.keys(), key=len, reverse=True))
+    range_pattern = re.findall(
+        rf"({month_name_re})\s*-\s*({month_name_re})", normalized
+    )
+    found_in_ranges: set[str] = set()
+    for start_name, end_name in range_pattern:
+        found_in_ranges.add(start_name)
+        found_in_ranges.add(end_name)
+        start_m = CZECH_MONTHS[start_name]
+        end_m = CZECH_MONTHS[end_name]
+        # Walk from start to end, wrapping around December→January
+        m = start_m
+        while True:
+            year = default_year if m >= start_m and start_m > end_m else default_year
+            # If range wraps (e.g. Nov-Jan), months >= start are previous year
+            if start_m > end_m and m >= start_m:
+                year = default_year - 1
+            results.add(f"{year:04d}-{m:02d}")
+            if m == end_m:
+                break
+            m = m % 12 + 1
+
+    # Individual Czech month names (not already part of a range)
+    for match in re.finditer(rf"\b({month_name_re})\b", normalized):
+        name = match.group(1)
+        if name in found_in_ranges:
+            continue
+        m = CZECH_MONTHS[name]
+        # Heuristic: if month > 9 and we're early in the year, it's likely previous year
+        year = default_year
+        if m >= 10:
+            year = default_year - 1
+        results.add(f"{year:04d}-{m:02d}")
+
+    return sorted(results)