#!/usr/bin/env python3 """ Přegeneruje mapu z již stažených dat (byty_sreality.json). Doplní chybějící plochy ze Sreality API, opraví URL, aplikuje filtry. """ from __future__ import annotations import argparse import json import time import urllib.request from pathlib import Path from scrape_and_map import ( generate_map, format_price, MIN_AREA, HEADERS, DETAIL_API ) def api_get(url: str) -> dict: req = urllib.request.Request(url, headers=HEADERS) with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode("utf-8")) def fix_sreality_url(estate: dict) -> str: """Fix the Sreality URL to include disposition segment (only if missing).""" disp = estate.get("disposition", "") slug_map = { "1+kk": "1+kk", "1+1": "1+1", "2+kk": "2+kk", "2+1": "2+1", "3+kk": "3+kk", "3+1": "3+1", "4+kk": "4+kk", "4+1": "4+1", "5+kk": "5+kk", "5+1": "5+1", "6+": "6-a-vice", "Atypický": "atypicky", } slug = slug_map.get(disp, "byt") old_url = estate.get("url", "") parts = old_url.split("/") try: byt_idx = parts.index("byt") # Only insert if disposition slug is not already there if byt_idx + 1 < len(parts) and parts[byt_idx + 1] == slug: return old_url # already correct parts.insert(byt_idx + 1, slug) return "/".join(parts) except ValueError: return old_url def fetch_area(hash_id: int) -> int | None: """Fetch area from detail API.""" try: url = DETAIL_API.format(hash_id) detail = api_get(url) for item in detail.get("items", []): name = item.get("name", "") if "žitná ploch" in name or "zitna ploch" in name.lower(): return int(item["value"]) except Exception: pass return None def main(data_dir: str = "."): data_path = Path(data_dir) json_path = data_path / "byty_sreality.json" if not json_path.exists(): print("Soubor byty_sreality.json nenalezen. Nejprve spusť scrape_and_map.py") return estates = json.loads(json_path.read_text(encoding="utf-8")) print(f"Načteno {len(estates)} bytů z byty_sreality.json") # Step 1: Fetch missing areas missing_area = [e for e in estates if e.get("area") is None] print(f"Doplňuji plochu u {len(missing_area)} bytů...") for i, e in enumerate(missing_area): time.sleep(0.3) area = fetch_area(e["hash_id"]) if area is not None: e["area"] = area if (i + 1) % 50 == 0: print(f" {i + 1}/{len(missing_area)} ...") # Count results with_area = sum(1 for e in estates if e.get("area") is not None) print(f"Plocha doplněna: {with_area}/{len(estates)}") # Step 2: Fix URLs for e in estates: e["url"] = fix_sreality_url(e) # Step 3: Filter by min area filtered = [] excluded = 0 for e in estates: area = e.get("area") if area is not None and area < MIN_AREA: excluded += 1 continue filtered.append(e) print(f"Vyloučeno (< {MIN_AREA} m²): {excluded}") print(f"Zbývá: {len(filtered)} bytů") # Save updated data filtered_path = data_path / "byty_sreality.json" filtered_path.write_text( json.dumps(filtered, ensure_ascii=False, indent=2), encoding="utf-8", ) # Generate map generate_map(filtered, output_path=str(data_path / "mapa_bytu.html")) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Regenerate map from existing data") parser.add_argument("--data-dir", type=str, default=".", help="Directory for reading/writing data files (default: current dir)") args = parser.parse_args() main(data_dir=args.data_dir)