feat: initial dashboard implementation and robust attendance parsing
- Added a Makefile to easily run project scripts (fees, match, web, image) - Modified attendance.py to dynamically handle a variable number of header rows from the Google Sheet - Updated both attendance calculations and calculate_fees terminal output to show actual attendance counts (e.g., '750 CZK (3)') - Created a Flask web dashboard (app.py and templates/fees.html) to view member fees in an attractive, condensed, terminal-like UI - Bound the Flask server to port 5000 and added a routing alias from '/' to '/fees' - Configured Python virtual environment (.venv) creation directly into the Makefile to resolve global pip install errors on macOS Co-authored-by: Antigravity <antigravity@deepmind.com>
This commit is contained in:
101
scripts/czech_utils.py
Normal file
101
scripts/czech_utils.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Czech text utilities — diacritics normalization and month parsing."""
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
CZECH_MONTHS = {
|
||||
"leden": 1, "ledna": 1, "lednu": 1,
|
||||
"unor": 2, "unora": 2, "unoru": 2,
|
||||
"brezen": 3, "brezna": 3, "breznu": 3,
|
||||
"duben": 4, "dubna": 4, "dubnu": 4,
|
||||
"kveten": 5, "kvetna": 5, "kvetnu": 5,
|
||||
"cerven": 6, "cervna": 6, "cervnu": 6,
|
||||
"cervenec": 7, "cervnce": 7, "cervenci": 7,
|
||||
"srpen": 8, "srpna": 8, "srpnu": 8,
|
||||
"zari": 9,
|
||||
"rijen": 10, "rijna": 10, "rijnu": 10,
|
||||
"listopad": 11, "listopadu": 11,
|
||||
"prosinec": 12, "prosince": 12, "prosinci": 12,
|
||||
}
|
||||
|
||||
|
||||
def normalize(text: str) -> str:
|
||||
"""Strip diacritics and lowercase."""
|
||||
nfkd = unicodedata.normalize("NFKD", text)
|
||||
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
|
||||
|
||||
|
||||
def parse_month_references(text: str, default_year: int = 2026) -> list[str]:
|
||||
"""Extract YYYY-MM month references from Czech free text.
|
||||
|
||||
Handles:
|
||||
- Czech month names: "leden", "únor", "prosinec" (all declensions)
|
||||
- Numeric: "01/26", "1/2026", "11+12/2025"
|
||||
- Ranges: "listopad-leden" (November through January)
|
||||
- Slash-separated numeric months: "11+12/2025"
|
||||
|
||||
Returns sorted list of unique YYYY-MM strings.
|
||||
"""
|
||||
normalized = normalize(text)
|
||||
results: set[str] = set()
|
||||
|
||||
# Pattern: numeric months with year, e.g. "11+12/2025", "01/26", "1/2026"
|
||||
# Match groups of digits separated by + before a /year
|
||||
numeric_pattern = re.findall(
|
||||
r"([\d+]+)\s*/\s*(\d{2,4})", normalized
|
||||
)
|
||||
for months_part, year_str in numeric_pattern:
|
||||
year = int(year_str)
|
||||
if year < 100:
|
||||
year += 2000
|
||||
for m_str in months_part.split("+"):
|
||||
m_str = m_str.strip()
|
||||
if m_str.isdigit():
|
||||
m = int(m_str)
|
||||
if 1 <= m <= 12:
|
||||
results.add(f"{year:04d}-{m:02d}")
|
||||
|
||||
# Pattern: standalone numeric month.year, e.g. "12.2025"
|
||||
dot_pattern = re.findall(r"(\d{1,2})\s*\.\s*(\d{4})", normalized)
|
||||
for m_str, year_str in dot_pattern:
|
||||
m, year = int(m_str), int(year_str)
|
||||
if 1 <= m <= 12:
|
||||
results.add(f"{year:04d}-{m:02d}")
|
||||
|
||||
# Czech month names — handle ranges like "listopad-leden"
|
||||
# First, find range patterns
|
||||
month_name_re = "|".join(sorted(CZECH_MONTHS.keys(), key=len, reverse=True))
|
||||
range_pattern = re.findall(
|
||||
rf"({month_name_re})\s*-\s*({month_name_re})", normalized
|
||||
)
|
||||
found_in_ranges: set[str] = set()
|
||||
for start_name, end_name in range_pattern:
|
||||
found_in_ranges.add(start_name)
|
||||
found_in_ranges.add(end_name)
|
||||
start_m = CZECH_MONTHS[start_name]
|
||||
end_m = CZECH_MONTHS[end_name]
|
||||
# Walk from start to end, wrapping around December→January
|
||||
m = start_m
|
||||
while True:
|
||||
year = default_year if m >= start_m and start_m > end_m else default_year
|
||||
# If range wraps (e.g. Nov-Jan), months >= start are previous year
|
||||
if start_m > end_m and m >= start_m:
|
||||
year = default_year - 1
|
||||
results.add(f"{year:04d}-{m:02d}")
|
||||
if m == end_m:
|
||||
break
|
||||
m = m % 12 + 1
|
||||
|
||||
# Individual Czech month names (not already part of a range)
|
||||
for match in re.finditer(rf"\b({month_name_re})\b", normalized):
|
||||
name = match.group(1)
|
||||
if name in found_in_ranges:
|
||||
continue
|
||||
m = CZECH_MONTHS[name]
|
||||
# Heuristic: if month > 9 and we're early in the year, it's likely previous year
|
||||
year = default_year
|
||||
if m >= 10:
|
||||
year = default_year - 1
|
||||
results.add(f"{year:04d}-{m:02d}")
|
||||
|
||||
return sorted(results)
|
||||
Reference in New Issue
Block a user