"""Shared utilities for scraper run statistics and listing validation.""" from __future__ import annotations import json import logging import os from pathlib import Path HERE = Path(__file__).parent DATA_DIR = Path(os.environ.get("DATA_DIR", HERE)) _val_log = logging.getLogger(__name__) _REQUIRED_FIELDS = ("hash_id", "price", "locality", "lat", "lon", "url", "source") def validate_listing(listing: dict, context: str = "") -> bool: """ Validate a listing dict before it is written to the output JSON. Returns True if valid, False if the listing should be skipped. Logs a warning for each invalid listing. """ prefix = f"[{context}] " if context else "" for field in _REQUIRED_FIELDS: val = listing.get(field) if val is None or val == "": _val_log.warning(f"{prefix}Skipping listing — missing field '{field}': {listing.get('hash_id', '?')}") return False price = listing.get("price") if not isinstance(price, (int, float)) or price <= 0: _val_log.warning(f"{prefix}Skipping listing — invalid price={price!r}: {listing.get('hash_id', '?')}") return False lat, lon = listing.get("lat"), listing.get("lon") if not isinstance(lat, (int, float)) or not isinstance(lon, (int, float)): _val_log.warning(f"{prefix}Skipping listing — non-numeric GPS lat={lat!r} lon={lon!r}: {listing.get('hash_id', '?')}") return False if not (47.0 <= lat <= 52.0) or not (12.0 <= lon <= 19.0): _val_log.warning(f"{prefix}Skipping listing — GPS outside Czech Republic lat={lat} lon={lon}: {listing.get('hash_id', '?')}") return False area = listing.get("area") if area is not None and (not isinstance(area, (int, float)) or area <= 0): _val_log.warning(f"{prefix}Skipping listing — invalid area={area!r}: {listing.get('hash_id', '?')}") return False return True def write_stats(filename: str, stats: dict) -> None: """Write scraper run stats dict to the data directory.""" path = DATA_DIR / filename path.write_text(json.dumps(stats, ensure_ascii=False, indent=2), encoding="utf-8")