- Added a Makefile to easily run project scripts (fees, match, web, image) - Modified attendance.py to dynamically handle a variable number of header rows from the Google Sheet - Updated both attendance calculations and calculate_fees terminal output to show actual attendance counts (e.g., '750 CZK (3)') - Created a Flask web dashboard (app.py and templates/fees.html) to view member fees in an attractive, condensed, terminal-like UI - Bound the Flask server to port 5000 and added a routing alias from '/' to '/fees' - Configured Python virtual environment (.venv) creation directly into the Makefile to resolve global pip install errors on macOS Co-authored-by: Antigravity <antigravity@deepmind.com>
102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
"""Czech text utilities — diacritics normalization and month parsing."""
|
|
|
|
import re
|
|
import unicodedata
|
|
|
|
CZECH_MONTHS = {
|
|
"leden": 1, "ledna": 1, "lednu": 1,
|
|
"unor": 2, "unora": 2, "unoru": 2,
|
|
"brezen": 3, "brezna": 3, "breznu": 3,
|
|
"duben": 4, "dubna": 4, "dubnu": 4,
|
|
"kveten": 5, "kvetna": 5, "kvetnu": 5,
|
|
"cerven": 6, "cervna": 6, "cervnu": 6,
|
|
"cervenec": 7, "cervnce": 7, "cervenci": 7,
|
|
"srpen": 8, "srpna": 8, "srpnu": 8,
|
|
"zari": 9,
|
|
"rijen": 10, "rijna": 10, "rijnu": 10,
|
|
"listopad": 11, "listopadu": 11,
|
|
"prosinec": 12, "prosince": 12, "prosinci": 12,
|
|
}
|
|
|
|
|
|
def normalize(text: str) -> str:
|
|
"""Strip diacritics and lowercase."""
|
|
nfkd = unicodedata.normalize("NFKD", text)
|
|
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
|
|
|
|
|
|
def parse_month_references(text: str, default_year: int = 2026) -> list[str]:
|
|
"""Extract YYYY-MM month references from Czech free text.
|
|
|
|
Handles:
|
|
- Czech month names: "leden", "únor", "prosinec" (all declensions)
|
|
- Numeric: "01/26", "1/2026", "11+12/2025"
|
|
- Ranges: "listopad-leden" (November through January)
|
|
- Slash-separated numeric months: "11+12/2025"
|
|
|
|
Returns sorted list of unique YYYY-MM strings.
|
|
"""
|
|
normalized = normalize(text)
|
|
results: set[str] = set()
|
|
|
|
# Pattern: numeric months with year, e.g. "11+12/2025", "01/26", "1/2026"
|
|
# Match groups of digits separated by + before a /year
|
|
numeric_pattern = re.findall(
|
|
r"([\d+]+)\s*/\s*(\d{2,4})", normalized
|
|
)
|
|
for months_part, year_str in numeric_pattern:
|
|
year = int(year_str)
|
|
if year < 100:
|
|
year += 2000
|
|
for m_str in months_part.split("+"):
|
|
m_str = m_str.strip()
|
|
if m_str.isdigit():
|
|
m = int(m_str)
|
|
if 1 <= m <= 12:
|
|
results.add(f"{year:04d}-{m:02d}")
|
|
|
|
# Pattern: standalone numeric month.year, e.g. "12.2025"
|
|
dot_pattern = re.findall(r"(\d{1,2})\s*\.\s*(\d{4})", normalized)
|
|
for m_str, year_str in dot_pattern:
|
|
m, year = int(m_str), int(year_str)
|
|
if 1 <= m <= 12:
|
|
results.add(f"{year:04d}-{m:02d}")
|
|
|
|
# Czech month names — handle ranges like "listopad-leden"
|
|
# First, find range patterns
|
|
month_name_re = "|".join(sorted(CZECH_MONTHS.keys(), key=len, reverse=True))
|
|
range_pattern = re.findall(
|
|
rf"({month_name_re})\s*-\s*({month_name_re})", normalized
|
|
)
|
|
found_in_ranges: set[str] = set()
|
|
for start_name, end_name in range_pattern:
|
|
found_in_ranges.add(start_name)
|
|
found_in_ranges.add(end_name)
|
|
start_m = CZECH_MONTHS[start_name]
|
|
end_m = CZECH_MONTHS[end_name]
|
|
# Walk from start to end, wrapping around December→January
|
|
m = start_m
|
|
while True:
|
|
year = default_year if m >= start_m and start_m > end_m else default_year
|
|
# If range wraps (e.g. Nov-Jan), months >= start are previous year
|
|
if start_m > end_m and m >= start_m:
|
|
year = default_year - 1
|
|
results.add(f"{year:04d}-{m:02d}")
|
|
if m == end_m:
|
|
break
|
|
m = m % 12 + 1
|
|
|
|
# Individual Czech month names (not already part of a range)
|
|
for match in re.finditer(rf"\b({month_name_re})\b", normalized):
|
|
name = match.group(1)
|
|
if name in found_in_ranges:
|
|
continue
|
|
m = CZECH_MONTHS[name]
|
|
# Heuristic: if month > 9 and we're early in the year, it's likely previous year
|
|
year = default_year
|
|
if m >= 10:
|
|
year = default_year - 1
|
|
results.add(f"{year:04d}-{m:02d}")
|
|
|
|
return sorted(results)
|