v1
This commit is contained in:
114
regen_map.py
Normal file
114
regen_map.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Přegeneruje mapu z již stažených dat (byty_sreality.json).
|
||||
Doplní chybějící plochy ze Sreality API, opraví URL, aplikuje filtry.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
from scrape_and_map import (
|
||||
generate_map, format_price, MIN_AREA, HEADERS, DETAIL_API
|
||||
)
|
||||
|
||||
|
||||
def api_get(url: str) -> dict:
|
||||
req = urllib.request.Request(url, headers=HEADERS)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def fix_sreality_url(estate: dict) -> str:
|
||||
"""Fix the Sreality URL to include disposition segment (only if missing)."""
|
||||
disp = estate.get("disposition", "")
|
||||
slug_map = {
|
||||
"1+kk": "1+kk", "1+1": "1+1", "2+kk": "2+kk", "2+1": "2+1",
|
||||
"3+kk": "3+kk", "3+1": "3+1", "4+kk": "4+kk", "4+1": "4+1",
|
||||
"5+kk": "5+kk", "5+1": "5+1", "6+": "6-a-vice", "Atypický": "atypicky",
|
||||
}
|
||||
slug = slug_map.get(disp, "byt")
|
||||
old_url = estate.get("url", "")
|
||||
parts = old_url.split("/")
|
||||
try:
|
||||
byt_idx = parts.index("byt")
|
||||
# Only insert if disposition slug is not already there
|
||||
if byt_idx + 1 < len(parts) and parts[byt_idx + 1] == slug:
|
||||
return old_url # already correct
|
||||
parts.insert(byt_idx + 1, slug)
|
||||
return "/".join(parts)
|
||||
except ValueError:
|
||||
return old_url
|
||||
|
||||
|
||||
def fetch_area(hash_id: int) -> int | None:
|
||||
"""Fetch area from detail API."""
|
||||
try:
|
||||
url = DETAIL_API.format(hash_id)
|
||||
detail = api_get(url)
|
||||
for item in detail.get("items", []):
|
||||
name = item.get("name", "")
|
||||
if "žitná ploch" in name or "zitna ploch" in name.lower():
|
||||
return int(item["value"])
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
json_path = Path("byty_sreality.json")
|
||||
if not json_path.exists():
|
||||
print("Soubor byty_sreality.json nenalezen. Nejprve spusť scrape_and_map.py")
|
||||
return
|
||||
|
||||
estates = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
print(f"Načteno {len(estates)} bytů z byty_sreality.json")
|
||||
|
||||
# Step 1: Fetch missing areas
|
||||
missing_area = [e for e in estates if e.get("area") is None]
|
||||
print(f"Doplňuji plochu u {len(missing_area)} bytů...")
|
||||
|
||||
for i, e in enumerate(missing_area):
|
||||
time.sleep(0.3)
|
||||
area = fetch_area(e["hash_id"])
|
||||
if area is not None:
|
||||
e["area"] = area
|
||||
if (i + 1) % 50 == 0:
|
||||
print(f" {i + 1}/{len(missing_area)} ...")
|
||||
|
||||
# Count results
|
||||
with_area = sum(1 for e in estates if e.get("area") is not None)
|
||||
print(f"Plocha doplněna: {with_area}/{len(estates)}")
|
||||
|
||||
# Step 2: Fix URLs
|
||||
for e in estates:
|
||||
e["url"] = fix_sreality_url(e)
|
||||
|
||||
# Step 3: Filter by min area
|
||||
filtered = []
|
||||
excluded = 0
|
||||
for e in estates:
|
||||
area = e.get("area")
|
||||
if area is not None and area < MIN_AREA:
|
||||
excluded += 1
|
||||
continue
|
||||
filtered.append(e)
|
||||
|
||||
print(f"Vyloučeno (< {MIN_AREA} m²): {excluded}")
|
||||
print(f"Zbývá: {len(filtered)} bytů")
|
||||
|
||||
# Save updated data
|
||||
filtered_path = Path("byty_sreality.json")
|
||||
filtered_path.write_text(
|
||||
json.dumps(filtered, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Generate map
|
||||
generate_map(filtered)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user