- Replace print() with Python logging module across all 6 scrapers for configurable log levels (DEBUG/INFO/WARNING/ERROR) - Add --max-pages, --max-properties, and --log-level CLI arguments to each scraper via argparse for limiting scrape scope - Add validation Make targets (validation, validation-local, validation-local-debug) for quick test runs with limited data - Update run_all.sh to parse and forward CLI args to all scrapers - Update mapa_bytu.html with latest scrape results Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
900 lines
33 KiB
Python
900 lines
33 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sreality scraper + interactive map generator.
|
|
Hledá byty na prodej v Praze podle zadaných kritérií a generuje HTML mapu.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import math
|
|
import time
|
|
import urllib.request
|
|
import urllib.parse
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── Konfigurace filtrů ──────────────────────────────────────────────────────
|
|
|
|
MAX_PRICE = 13_500_000 # Kč
|
|
MIN_AREA = 69 # m² — vyloučit byty menší než toto
|
|
DISPOSITIONS = [6, 7, 8, 9, 10, 11, 12] # 3+kk, 3+1, 4+kk, 4+1, 5+kk, 5+1, 6+
|
|
MIN_FLOOR = 2 # stáhneme od 2. NP, na mapě označíme 2. NP zvlášť
|
|
REGION_ID = 10 # Praha
|
|
PER_PAGE = 60
|
|
|
|
# Sreality API base
|
|
API_BASE = "https://www.sreality.cz/api/cs/v2/estates"
|
|
DETAIL_API = "https://www.sreality.cz/api/cs/v2/estates/{}"
|
|
|
|
# Klíčová slova pro vyloučení panelových domů / sídlišť
|
|
PANEL_KEYWORDS = {"panel", "panelový", "panelový dům", "panelák"}
|
|
SIDLISTE_KEYWORDS = {"sídliště", "sidliste"}
|
|
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
|
|
def api_get(url: str) -> dict:
|
|
"""Fetch JSON from Sreality API."""
|
|
logger.debug(f"HTTP GET request: {url}")
|
|
logger.debug(f"Headers: {HEADERS}")
|
|
req = urllib.request.Request(url, headers=HEADERS)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
response_data = resp.read().decode("utf-8")
|
|
logger.debug(f"HTTP response: status={resp.status}, size={len(response_data)} bytes")
|
|
logger.debug(f"Response preview: {response_data[:200]}")
|
|
return json.loads(response_data)
|
|
except (urllib.error.URLError, ConnectionError, OSError) as e:
|
|
logger.error(f"HTTP request failed for {url}: {e}", exc_info=True)
|
|
raise
|
|
|
|
|
|
def build_list_url(disposition: int, page: int = 1) -> str:
|
|
"""Build Sreality API URL for a given disposition."""
|
|
params = {
|
|
"category_main_cb": 1, # byty
|
|
"category_type_cb": 1, # prodej
|
|
"category_sub_cb": disposition,
|
|
"locality_region_id": REGION_ID,
|
|
"czk_price_summary_order2": f"0|{MAX_PRICE}",
|
|
"floor_number": f"{MIN_FLOOR}|99",
|
|
"per_page": PER_PAGE,
|
|
"page": page,
|
|
}
|
|
return f"{API_BASE}?{urllib.parse.urlencode(params)}"
|
|
|
|
|
|
def fetch_estates_for_disposition(disposition: int, max_pages: int | None = None) -> list[dict]:
|
|
"""Fetch all estates for a given disposition, handling pagination."""
|
|
url = build_list_url(disposition, page=1)
|
|
logger.info(f"Fetching disposition {disposition}, page 1 ...")
|
|
data = api_get(url)
|
|
total = data.get("result_size", 0)
|
|
estates = data.get("_embedded", {}).get("estates", [])
|
|
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 0
|
|
|
|
logger.info(f"→ {total} results, {total_pages} pages")
|
|
|
|
# Limit pages if max_pages is specified
|
|
if max_pages is not None:
|
|
original_pages = total_pages
|
|
total_pages = min(total_pages, max_pages)
|
|
logger.debug(f"Max pages limit reached: limiting {original_pages} pages to {total_pages}")
|
|
|
|
for page in range(2, total_pages + 1):
|
|
time.sleep(0.5)
|
|
logger.info(f"Fetching page {page}/{total_pages} ...")
|
|
url = build_list_url(disposition, page=page)
|
|
data = api_get(url)
|
|
estates.extend(data.get("_embedded", {}).get("estates", []))
|
|
|
|
return estates
|
|
|
|
|
|
def get_estate_detail(hash_id: int) -> dict | None:
|
|
"""Fetch detail for a single estate to get floor info and building type."""
|
|
try:
|
|
url = DETAIL_API.format(hash_id)
|
|
logger.debug(f"Fetching detail for hash_id={hash_id}")
|
|
detail = api_get(url)
|
|
logger.debug(f"Detail fetched for hash_id={hash_id}, keys: {list(detail.keys())[:5]}")
|
|
return detail
|
|
except Exception as e:
|
|
logger.warning(f"Could not fetch detail for hash_id={hash_id}: {e}", exc_info=True)
|
|
return None
|
|
|
|
|
|
def parse_floor_from_detail(detail: dict) -> int | None:
|
|
"""Extract floor number from detail items."""
|
|
for item in detail.get("items", []):
|
|
if item.get("name") == "Podlaží" or item.get("name") == "Podlazi":
|
|
val = item.get("value", "")
|
|
# Format: "3. podlaží z celkem 5 ..." or similar
|
|
parts = val.split(".")
|
|
if parts:
|
|
try:
|
|
return int(parts[0].strip())
|
|
except ValueError:
|
|
pass
|
|
return None
|
|
|
|
|
|
def is_panel_or_sidliste(detail: dict) -> tuple[bool, str]:
|
|
"""
|
|
Check if the estate is panel construction or on a sídliště.
|
|
Returns (should_exclude, reason).
|
|
"""
|
|
reasons = []
|
|
for item in detail.get("items", []):
|
|
name = (item.get("name") or "").lower()
|
|
value = str(item.get("value") or "").lower()
|
|
|
|
# Check "Stavba" field for panel
|
|
if name in ("stavba", "konstrukce"):
|
|
if "panel" in value:
|
|
reasons.append(f"stavba: {value}")
|
|
|
|
# Check "Umístění objektu" for sídliště
|
|
if name in ("umístění objektu", "umisteni objektu"):
|
|
if "sídliště" in value or "sidliste" in value:
|
|
reasons.append(f"umístění: {value}")
|
|
|
|
# Also check description text
|
|
description = str(detail.get("text", {}).get("value", "")).lower()
|
|
locality_text = str(detail.get("locality", {}).get("value", "")).lower() if isinstance(detail.get("locality"), dict) else ""
|
|
|
|
return (len(reasons) > 0, "; ".join(reasons))
|
|
|
|
|
|
def disposition_label(sub_cb: int) -> str:
|
|
"""Human-readable disposition label."""
|
|
labels = {
|
|
2: "1+kk", 3: "1+1", 4: "2+kk", 5: "2+1",
|
|
6: "3+kk", 7: "3+1", 8: "4+kk", 9: "4+1",
|
|
10: "5+kk", 11: "5+1", 12: "6+", 16: "Atypický",
|
|
}
|
|
return labels.get(sub_cb, "?")
|
|
|
|
|
|
def disposition_url_slug(sub_cb: int) -> str:
|
|
"""URL slug for disposition in Sreality URLs."""
|
|
slugs = {
|
|
2: "1+kk", 3: "1+1", 4: "2+kk", 5: "2+1",
|
|
6: "3+kk", 7: "3+1", 8: "4+kk", 9: "4+1",
|
|
10: "5+kk", 11: "5+1", 12: "6-a-vice", 16: "atypicky",
|
|
}
|
|
return slugs.get(sub_cb, "byt")
|
|
|
|
|
|
def sreality_url(hash_id: int, seo: dict) -> str:
|
|
"""Build human-readable Sreality URL from estate data."""
|
|
cat_type = {1: "prodej", 2: "pronajem"}.get(seo.get("category_type_cb", 1), "prodej")
|
|
cat_main = {1: "byt", 2: "dum", 3: "pozemek", 4: "komercni"}.get(seo.get("category_main_cb", 1), "byt")
|
|
cat_sub = disposition_url_slug(seo.get("category_sub_cb", 0))
|
|
locality = seo.get("locality", "praha")
|
|
return f"https://www.sreality.cz/detail/{cat_type}/{cat_main}/{cat_sub}/{locality}/{hash_id}"
|
|
|
|
|
|
def format_price(price: int) -> str:
|
|
"""Format price in CZK with spaces."""
|
|
s = str(price)
|
|
parts = []
|
|
while s:
|
|
parts.append(s[-3:])
|
|
s = s[:-3]
|
|
return " ".join(reversed(parts)) + " Kč"
|
|
|
|
|
|
# ── Hlavní scraping ─────────────────────────────────────────────────────────
|
|
|
|
def load_cache(json_path: str = "byty_sreality.json") -> dict[int, dict]:
|
|
"""Load previously scraped data as cache keyed by hash_id."""
|
|
path = Path(json_path)
|
|
if not path.exists():
|
|
return {}
|
|
try:
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
return {e["hash_id"]: e for e in data if "hash_id" in e}
|
|
except (json.JSONDecodeError, KeyError):
|
|
return {}
|
|
|
|
|
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
|
"""Main scraping function. Returns list of filtered estates."""
|
|
all_estates_raw = []
|
|
cache = load_cache()
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("Stahuji inzeráty ze Sreality.cz")
|
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
|
logger.info(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
|
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
|
logger.info(f"Region: Praha")
|
|
if cache:
|
|
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
|
|
if max_pages:
|
|
logger.info(f"Limit stran: {max_pages}")
|
|
if max_properties:
|
|
logger.info(f"Limit majetků: {max_properties}")
|
|
logger.info("=" * 60)
|
|
|
|
for disp in DISPOSITIONS:
|
|
logger.info(f"\n▸ Dispozice: {disposition_label(disp)}")
|
|
estates = fetch_estates_for_disposition(disp, max_pages=max_pages)
|
|
for e in estates:
|
|
e["_disposition_cb"] = disp
|
|
all_estates_raw.extend(estates)
|
|
time.sleep(0.5)
|
|
|
|
# Deduplicate by hash_id
|
|
seen = set()
|
|
unique_estates = []
|
|
for e in all_estates_raw:
|
|
hid = e.get("hash_id")
|
|
if hid and hid not in seen:
|
|
seen.add(hid)
|
|
unique_estates.append(e)
|
|
|
|
logger.info(f"\n{'=' * 60}")
|
|
logger.info(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
|
|
logger.info(f"Stahuji detaily pro filtrování panelu/sídlišť...")
|
|
logger.info(f"{'=' * 60}")
|
|
|
|
# Fetch details and filter
|
|
results = []
|
|
excluded_panel = 0
|
|
excluded_no_gps = 0
|
|
excluded_no_detail = 0
|
|
excluded_small = 0
|
|
cache_hits = 0
|
|
details_fetched = 0
|
|
|
|
for i, estate in enumerate(unique_estates):
|
|
# Stop if max_properties reached
|
|
if max_properties is not None and details_fetched >= max_properties:
|
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
|
break
|
|
hash_id = estate.get("hash_id")
|
|
gps = estate.get("gps", {})
|
|
|
|
if not gps or not gps.get("lat") or not gps.get("lon"):
|
|
excluded_no_gps += 1
|
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (no GPS)")
|
|
continue
|
|
|
|
# Check cache — if hash_id exists and price unchanged, reuse
|
|
cached = cache.get(hash_id)
|
|
if cached and cached.get("price") == estate.get("price", 0):
|
|
cache_hits += 1
|
|
logger.debug(f"Cache hit for hash_id={hash_id}")
|
|
results.append(cached)
|
|
continue
|
|
|
|
# Fetch detail
|
|
time.sleep(0.3)
|
|
detail = get_estate_detail(hash_id)
|
|
if not detail:
|
|
excluded_no_detail += 1
|
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (no detail)")
|
|
continue
|
|
|
|
# Check panel / sídliště
|
|
is_excluded, reason = is_panel_or_sidliste(detail)
|
|
if is_excluded:
|
|
excluded_panel += 1
|
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (panel/sídliště): {reason}")
|
|
logger.info(f"✗ Vyloučen #{hash_id}: {reason}")
|
|
continue
|
|
|
|
# Parse floor
|
|
floor = parse_floor_from_detail(detail)
|
|
|
|
# Get area — field name can be truncated ("Užitná ploch" or "Užitná plocha")
|
|
area = None
|
|
for item in detail.get("items", []):
|
|
name = item.get("name", "")
|
|
if "žitná ploch" in name or "zitna ploch" in name.lower():
|
|
try:
|
|
area = int(item["value"])
|
|
except (ValueError, KeyError):
|
|
pass
|
|
break
|
|
|
|
# Filter by minimum area
|
|
if area is not None and area < MIN_AREA:
|
|
excluded_small += 1
|
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (area {area} m² < {MIN_AREA} m²)")
|
|
logger.info(f"✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
|
|
continue
|
|
|
|
# Get building type
|
|
building_type = None
|
|
for item in detail.get("items", []):
|
|
if item.get("name") in ("Stavba", "Konstrukce"):
|
|
building_type = item.get("value")
|
|
break
|
|
|
|
# Get ownership
|
|
ownership = None
|
|
for item in detail.get("items", []):
|
|
if item.get("name") in ("Vlastnictví", "Vlastnictvi"):
|
|
ownership = item.get("value")
|
|
break
|
|
|
|
disp_cb = estate.get("_disposition_cb") or estate.get("seo", {}).get("category_sub_cb")
|
|
seo = estate.get("seo", {})
|
|
|
|
result = {
|
|
"hash_id": hash_id,
|
|
"name": estate.get("name", ""),
|
|
"price": estate.get("price", 0),
|
|
"price_formatted": format_price(estate.get("price", 0)),
|
|
"locality": estate.get("locality", ""),
|
|
"lat": gps["lat"],
|
|
"lon": gps["lon"],
|
|
"disposition": disposition_label(disp_cb),
|
|
"floor": floor,
|
|
"area": area,
|
|
"building_type": building_type,
|
|
"ownership": ownership,
|
|
"url": sreality_url(hash_id, seo),
|
|
"image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
|
|
}
|
|
results.append(result)
|
|
details_fetched += 1
|
|
|
|
if (i + 1) % 20 == 0:
|
|
logger.info(f"Zpracováno {i + 1}/{len(unique_estates)} ...")
|
|
|
|
logger.info(f"\n{'=' * 60}")
|
|
logger.info(f"Výsledky:")
|
|
logger.info(f" Celkem staženo: {len(unique_estates)}")
|
|
logger.info(f" Z cache (přeskočeno): {cache_hits}")
|
|
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
|
|
logger.info(f" Vyloučeno (<{MIN_AREA} m²): {excluded_small}")
|
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
|
logger.info(f" Vyloučeno (bez detailu): {excluded_no_detail}")
|
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
|
logger.info(f"{'=' * 60}")
|
|
|
|
return results
|
|
|
|
|
|
# ── Generování HTML mapy ────────────────────────────────────────────────────
|
|
|
|
def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|
"""Generate an interactive Leaflet.js HTML map."""
|
|
|
|
# Color by disposition
|
|
color_map = {
|
|
"3+kk": "#2196F3", # blue
|
|
"3+1": "#4CAF50", # green
|
|
"4+kk": "#FF9800", # orange
|
|
"4+1": "#F44336", # red
|
|
"5+kk": "#9C27B0", # purple
|
|
"5+1": "#795548", # brown
|
|
"6+": "#607D8B", # grey-blue
|
|
}
|
|
|
|
markers_js = ""
|
|
for e in estates:
|
|
color = color_map.get(e["disposition"], "#999999")
|
|
floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
|
|
area_text = f'{e["area"]} m²' if e["area"] else "neuvedeno"
|
|
building_text = e["building_type"] or "neuvedeno"
|
|
ownership_text = e["ownership"] or "neuvedeno"
|
|
|
|
# Floor warning for 2nd floor
|
|
floor_note = ""
|
|
if e["floor"] == 2:
|
|
floor_note = '<br><span style="color:#FF9800;font-weight:bold;">⚠ 2. NP — zvážit klidnost lokality</span>'
|
|
|
|
source = e.get("source", "sreality")
|
|
source_labels = {"sreality": "Sreality", "realingo": "Realingo", "bezrealitky": "Bezrealitky", "idnes": "iDNES", "psn": "PSN", "cityhome": "CityHome"}
|
|
source_colors = {"sreality": "#1976D2", "realingo": "#00897B", "bezrealitky": "#E91E63", "idnes": "#FF6F00", "psn": "#D32F2F", "cityhome": "#D32F2F"}
|
|
source_label = source_labels.get(source, source)
|
|
source_color = source_colors.get(source, "#999")
|
|
|
|
hash_id = e.get("hash_id", "")
|
|
|
|
popup = (
|
|
f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
|
|
f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
|
|
f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
|
|
f'padding:1px 6px;border-radius:3px;">{source_label}</span><br>'
|
|
f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
|
|
f'{floor_note}<br><br>'
|
|
f'<b>{e["locality"]}</b><br>'
|
|
f'Stavba: {building_text}<br>'
|
|
f'Vlastnictví: {ownership_text}<br><br>'
|
|
f'<a href="{e["url"]}" target="_blank" '
|
|
f'style="color:{source_color};text-decoration:none;font-weight:bold;">'
|
|
f'→ Otevřít na {source_label}</a>'
|
|
f'<div style="margin-top:10px;padding-top:8px;border-top:1px solid #eee;">'
|
|
f'<div style="display:flex;gap:6px;align-items:center;">'
|
|
f'<button class="rate-btn fav-btn" data-action="fav" '
|
|
f'style="padding:4px 12px;border:1px solid #ccc;border-radius:4px;'
|
|
f'cursor:pointer;font-size:16px;background:#fff;">⭐</button>'
|
|
f'<button class="rate-btn rej-btn" data-action="reject" '
|
|
f'style="padding:4px 12px;border:1px solid #ccc;border-radius:4px;'
|
|
f'cursor:pointer;font-size:16px;background:#fff;">🚫</button>'
|
|
f'<span class="rating-status" style="margin-left:6px;font-size:12px;color:#999;"></span>'
|
|
f'</div>'
|
|
f'<textarea class="rating-note" placeholder="Poznámka..." '
|
|
f'style="width:100%;margin-top:6px;padding:4px;border:1px solid #ddd;'
|
|
f'border-radius:4px;font-size:12px;resize:vertical;min-height:32px;'
|
|
f'display:none;font-family:system-ui,sans-serif;"></textarea>'
|
|
f'</div></div>'
|
|
)
|
|
# Escape for JS
|
|
popup = popup.replace("'", "\\'").replace("\n", "")
|
|
|
|
is_fav = source in ("psn", "cityhome")
|
|
marker_fn = "addHeartMarker" if is_fav else "addMarker"
|
|
markers_js += (
|
|
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
|
|
)
|
|
|
|
# Build legend
|
|
legend_items = ""
|
|
disp_counts = {}
|
|
for e in estates:
|
|
d = e["disposition"]
|
|
disp_counts[d] = disp_counts.get(d, 0) + 1
|
|
for disp, color in color_map.items():
|
|
count = disp_counts.get(disp, 0)
|
|
if count > 0:
|
|
legend_items += (
|
|
f'<div style="display:flex;align-items:center;gap:6px;margin:3px 0;">'
|
|
f'<span style="width:14px;height:14px;border-radius:50%;'
|
|
f'background:{color};display:inline-block;border:2px solid white;'
|
|
f'box-shadow:0 1px 3px rgba(0,0,0,0.3);"></span>'
|
|
f'<span>{disp} ({count})</span></div>'
|
|
)
|
|
|
|
# Heart marker legend for PSN/CityHome
|
|
fav_count = sum(1 for e in estates if e.get("source") in ("psn", "cityhome"))
|
|
if fav_count > 0:
|
|
legend_items += (
|
|
f'<div style="display:flex;align-items:center;gap:6px;margin:8px 0 3px 0;'
|
|
f'padding-top:6px;border-top:1px solid #eee;">'
|
|
f'<svg width="14" height="14" viewBox="0 0 24 24">'
|
|
f'<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
|
|
f'C2 5.42 4.42 3 7.5 3c1.74 0 3.41.81 4.5 2.09C13.09 3.81 '
|
|
f'14.76 3 16.5 3 19.58 3 22 5.42 22 8.5c0 3.78-3.4 6.86-8.55 '
|
|
f'11.54L12 21.35z" fill="#D32F2F"/></svg>'
|
|
f'<span>PSN / CityHome ({fav_count})</span></div>'
|
|
)
|
|
|
|
# Price stats
|
|
prices = [e["price"] for e in estates if e["price"] > 0]
|
|
min_price = format_price(min(prices)) if prices else "N/A"
|
|
max_price = format_price(max(prices)) if prices else "N/A"
|
|
avg_price = format_price(int(sum(prices) / len(prices))) if prices else "N/A"
|
|
|
|
html = f"""<!DOCTYPE html>
|
|
<html lang="cs">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Byty v Praze — mapa ({len(estates)} bytů)</title>
|
|
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
|
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
|
<style>
|
|
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
body {{ font-family: system-ui, -apple-system, sans-serif; }}
|
|
#map {{ width: 100%; height: 100vh; }}
|
|
.heart-icon {{ background: none !important; border: none !important; }}
|
|
.rate-btn:hover {{ background: #f0f0f0 !important; }}
|
|
.rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
|
|
.rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
|
|
@keyframes pulse-glow {{
|
|
0% {{ box-shadow: 0 0 4px 2px rgba(255, 193, 7, 0.4); }}
|
|
50% {{ box-shadow: 0 0 10px 5px rgba(255, 193, 7, 0.7); }}
|
|
100% {{ box-shadow: 0 0 4px 2px rgba(255, 193, 7, 0.4); }}
|
|
}}
|
|
.marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
|
|
.heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
|
|
.heart-icon-rej {{ opacity: 0.2 !important; }}
|
|
.info-panel {{
|
|
position: absolute; top: 10px; right: 10px; z-index: 1000;
|
|
background: white; padding: 16px; border-radius: 10px;
|
|
box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
|
|
font-size: 13px; line-height: 1.5;
|
|
}}
|
|
.info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
|
|
.info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
|
|
.filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
|
|
.filter-section label {{ display: flex; align-items: center; gap: 6px; margin: 3px 0; cursor: pointer; }}
|
|
.filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
|
|
#floor-filter {{ margin-top: 8px; }}
|
|
#floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div id="map"></div>
|
|
<div class="info-panel">
|
|
<h2>Byty v Praze</h2>
|
|
<div class="stats">
|
|
<div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
|
|
<div>Cena: {min_price} — {max_price}</div>
|
|
<div>Průměr: {avg_price}</div>
|
|
</div>
|
|
<div><b>Dispozice:</b></div>
|
|
{legend_items}
|
|
<div class="filter-section">
|
|
<b>Filtry:</b>
|
|
<div id="floor-filter">
|
|
<label>Patro od:
|
|
<select id="min-floor" onchange="applyFilters()">
|
|
<option value="2">2. NP (vše)</option>
|
|
<option value="3">3. NP+</option>
|
|
<option value="4">4. NP+</option>
|
|
<option value="5">5. NP+</option>
|
|
</select>
|
|
</label>
|
|
</div>
|
|
<div style="margin-top:6px;">
|
|
<label>Max cena:
|
|
<select id="max-price" onchange="applyFilters()">
|
|
<option value="13500000">13 500 000 Kč</option>
|
|
<option value="12000000">12 000 000 Kč</option>
|
|
<option value="10000000">10 000 000 Kč</option>
|
|
<option value="8000000">8 000 000 Kč</option>
|
|
</select>
|
|
</label>
|
|
</div>
|
|
</div>
|
|
<div class="filter-section">
|
|
<div id="rating-counts" style="margin-bottom:6px;font-size:12px;color:#666;">
|
|
⭐ 0 oblíbených, 🚫 0 zamítnutých
|
|
</div>
|
|
<label>
|
|
<input type="checkbox" id="hide-rejected" onchange="applyFilters()">
|
|
Skrýt zamítnuté
|
|
</label>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
// Prague center
|
|
var map = L.map('map').setView([50.075, 14.437], 12);
|
|
|
|
L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_nolabels/{{z}}/{{x}}/{{y}}{{r}}.png', {{
|
|
attribution: '© OpenStreetMap contributors © CARTO',
|
|
maxZoom: 19,
|
|
subdomains: 'abcd',
|
|
}}).addTo(map);
|
|
|
|
// Labels as separate layer on top (so markers sit between background and labels)
|
|
L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_only_labels/{{z}}/{{x}}/{{y}}{{r}}.png', {{
|
|
maxZoom: 19,
|
|
subdomains: 'abcd',
|
|
pane: 'shadowPane',
|
|
}}).addTo(map);
|
|
|
|
var allMarkers = [];
|
|
|
|
function addMarker(lat, lon, color, popup, hashId) {{
|
|
var marker = L.circleMarker([lat, lon], {{
|
|
radius: 8,
|
|
fillColor: color,
|
|
color: '#fff',
|
|
weight: 2,
|
|
opacity: 1,
|
|
fillOpacity: 0.85,
|
|
}}).bindPopup(popup);
|
|
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId }};
|
|
allMarkers.push(marker);
|
|
marker.addTo(map);
|
|
}}
|
|
|
|
function heartIcon(color) {{
|
|
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
|
|
+ '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
|
|
+ 'C2 5.42 4.42 3 7.5 3c1.74 0 3.41.81 4.5 2.09C13.09 3.81 '
|
|
+ '14.76 3 16.5 3 19.58 3 22 5.42 22 8.5c0 3.78-3.4 6.86-8.55 '
|
|
+ '11.54L12 21.35z" fill="' + color + '" stroke="white" stroke-width="1.5"/></svg>';
|
|
return L.divIcon({{
|
|
html: svg,
|
|
className: 'heart-icon',
|
|
iconSize: [24, 24],
|
|
iconAnchor: [12, 22],
|
|
popupAnchor: [0, -18],
|
|
}});
|
|
}}
|
|
|
|
function addHeartMarker(lat, lon, color, popup, hashId) {{
|
|
var marker = L.marker([lat, lon], {{
|
|
icon: heartIcon(color),
|
|
}}).bindPopup(popup);
|
|
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true }};
|
|
allMarkers.push(marker);
|
|
marker.addTo(map);
|
|
}}
|
|
|
|
{markers_js}
|
|
|
|
// ── Rating system ──────────────────────────────────────────────
|
|
var RATINGS_KEY = 'byty_ratings';
|
|
|
|
function loadRatings() {{
|
|
try {{
|
|
var data = localStorage.getItem(RATINGS_KEY);
|
|
return data ? JSON.parse(data) : {{}};
|
|
}} catch(e) {{ return {{}}; }}
|
|
}}
|
|
|
|
function saveRatings(ratings) {{
|
|
localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
|
|
}}
|
|
|
|
function applyMarkerStyle(marker, status) {{
|
|
if (marker._data.isHeart) {{
|
|
var el = marker._icon;
|
|
if (!el) return;
|
|
el.classList.remove('heart-icon-fav', 'heart-icon-rej');
|
|
el.style.transform = el.style.transform.replace(/scale\\([^)]*\\)/, '');
|
|
if (status === 'fav') {{
|
|
el.classList.add('heart-icon-fav');
|
|
el.style.transform += ' scale(1.3)';
|
|
}} else if (status === 'reject') {{
|
|
el.classList.add('heart-icon-rej');
|
|
}}
|
|
}} else {{
|
|
if (status === 'fav') {{
|
|
marker.setStyle({{
|
|
radius: 12, fillOpacity: 1, weight: 3,
|
|
fillColor: marker._data.color, color: '#fff',
|
|
}});
|
|
if (marker._path) marker._path.classList.add('marker-favorite');
|
|
}} else if (status === 'reject') {{
|
|
marker.setStyle({{
|
|
radius: 6, fillOpacity: 0.15, fillColor: '#999', color: '#bbb', weight: 1,
|
|
}});
|
|
if (marker._path) marker._path.classList.remove('marker-favorite');
|
|
}} else {{
|
|
marker.setStyle({{
|
|
radius: 8, fillColor: marker._data.color, color: '#fff',
|
|
weight: 2, fillOpacity: 0.85,
|
|
}});
|
|
if (marker._path) marker._path.classList.remove('marker-favorite');
|
|
}}
|
|
}}
|
|
}}
|
|
|
|
function rateMarker(marker, action) {{
|
|
var hashId = marker._data.hashId;
|
|
var ratings = loadRatings();
|
|
var current = ratings[hashId];
|
|
if (current && current.status === action) {{
|
|
delete ratings[hashId];
|
|
saveRatings(ratings);
|
|
applyMarkerStyle(marker, null);
|
|
updateRatingCounts();
|
|
applyFilters();
|
|
return null;
|
|
}} else {{
|
|
var note = (current && current.note) || '';
|
|
ratings[hashId] = {{ status: action, note: note }};
|
|
saveRatings(ratings);
|
|
applyMarkerStyle(marker, action);
|
|
updateRatingCounts();
|
|
applyFilters();
|
|
return action;
|
|
}}
|
|
}}
|
|
|
|
function setNote(hashId, note) {{
|
|
var ratings = loadRatings();
|
|
if (ratings[hashId]) {{
|
|
ratings[hashId].note = note;
|
|
saveRatings(ratings);
|
|
}}
|
|
}}
|
|
|
|
function updateRatingCounts() {{
|
|
var ratings = loadRatings();
|
|
var favCount = 0, rejCount = 0;
|
|
for (var id in ratings) {{
|
|
if (ratings[id].status === 'fav') favCount++;
|
|
if (ratings[id].status === 'reject') rejCount++;
|
|
}}
|
|
var el = document.getElementById('rating-counts');
|
|
if (el) el.innerHTML = '⭐ ' + favCount + ' oblíbených, 🚫 ' + rejCount + ' zamítnutých';
|
|
}}
|
|
|
|
function restoreRatings() {{
|
|
var ratings = loadRatings();
|
|
allMarkers.forEach(function(m) {{
|
|
var r = ratings[m._data.hashId];
|
|
if (r) applyMarkerStyle(m, r.status);
|
|
}});
|
|
updateRatingCounts();
|
|
}}
|
|
|
|
// ── Popup rating handler ───────────────────────────────────────
|
|
map.on('popupopen', function(e) {{
|
|
var container = e.popup.getElement();
|
|
if (!container) return;
|
|
var wrapper = container.querySelector('[data-hashid]');
|
|
if (!wrapper) return;
|
|
var hashId = wrapper.getAttribute('data-hashid');
|
|
|
|
var marker = null;
|
|
for (var i = 0; i < allMarkers.length; i++) {{
|
|
if (String(allMarkers[i]._data.hashId) === String(hashId)) {{
|
|
marker = allMarkers[i]; break;
|
|
}}
|
|
}}
|
|
if (!marker) return;
|
|
|
|
var ratings = loadRatings();
|
|
var current = ratings[hashId];
|
|
var favBtn = container.querySelector('.fav-btn');
|
|
var rejBtn = container.querySelector('.rej-btn');
|
|
var statusEl = container.querySelector('.rating-status');
|
|
var noteEl = container.querySelector('.rating-note');
|
|
|
|
// Restore state in popup
|
|
favBtn.classList.remove('active-fav');
|
|
rejBtn.classList.remove('active-rej');
|
|
statusEl.textContent = '';
|
|
noteEl.style.display = 'none';
|
|
noteEl.value = '';
|
|
|
|
if (current) {{
|
|
if (current.status === 'fav') {{
|
|
favBtn.classList.add('active-fav');
|
|
statusEl.textContent = 'Oblíbený';
|
|
statusEl.style.color = '#F9A825';
|
|
}} else if (current.status === 'reject') {{
|
|
rejBtn.classList.add('active-rej');
|
|
statusEl.textContent = 'Zamítnutý';
|
|
statusEl.style.color = '#E53935';
|
|
}}
|
|
noteEl.style.display = 'block';
|
|
noteEl.value = current.note || '';
|
|
}}
|
|
|
|
favBtn.onclick = function() {{
|
|
var result = rateMarker(marker, 'fav');
|
|
favBtn.classList.remove('active-fav');
|
|
rejBtn.classList.remove('active-rej');
|
|
if (result === 'fav') {{
|
|
favBtn.classList.add('active-fav');
|
|
statusEl.textContent = 'Oblíbený';
|
|
statusEl.style.color = '#F9A825';
|
|
noteEl.style.display = 'block';
|
|
}} else {{
|
|
statusEl.textContent = '';
|
|
noteEl.style.display = 'none';
|
|
noteEl.value = '';
|
|
}}
|
|
}};
|
|
|
|
rejBtn.onclick = function() {{
|
|
var result = rateMarker(marker, 'reject');
|
|
favBtn.classList.remove('active-fav');
|
|
rejBtn.classList.remove('active-rej');
|
|
if (result === 'reject') {{
|
|
rejBtn.classList.add('active-rej');
|
|
statusEl.textContent = 'Zamítnutý';
|
|
statusEl.style.color = '#E53935';
|
|
noteEl.style.display = 'block';
|
|
}} else {{
|
|
statusEl.textContent = '';
|
|
noteEl.style.display = 'none';
|
|
noteEl.value = '';
|
|
}}
|
|
}};
|
|
|
|
var noteTimer = null;
|
|
noteEl.oninput = function() {{
|
|
clearTimeout(noteTimer);
|
|
noteTimer = setTimeout(function() {{
|
|
setNote(hashId, noteEl.value);
|
|
}}, 500);
|
|
}};
|
|
}});
|
|
|
|
// ── Filters ────────────────────────────────────────────────────
|
|
function applyFilters() {{
|
|
var minFloor = parseInt(document.getElementById('min-floor').value);
|
|
var maxPrice = parseInt(document.getElementById('max-price').value);
|
|
var hideRejected = document.getElementById('hide-rejected').checked;
|
|
var ratings = loadRatings();
|
|
var visible = 0;
|
|
|
|
allMarkers.forEach(function(m) {{
|
|
var popup = m.getPopup().getContent();
|
|
var floorMatch = popup.match(/(\\d+)\\. NP/);
|
|
var priceMatch = popup.match(/([\\d\\s]+)\\sKč/);
|
|
|
|
var floor = floorMatch ? parseInt(floorMatch[1]) : null;
|
|
var price = priceMatch ? parseInt(priceMatch[1].replace(/\\s/g, '')) : 0;
|
|
|
|
var show = true;
|
|
if (floor !== null && floor < minFloor) show = false;
|
|
if (price > maxPrice) show = false;
|
|
|
|
var r = ratings[m._data.hashId];
|
|
if (hideRejected && r && r.status === 'reject') show = false;
|
|
|
|
if (show) {{
|
|
if (!map.hasLayer(m)) m.addTo(map);
|
|
visible++;
|
|
}} else {{
|
|
if (map.hasLayer(m)) map.removeLayer(m);
|
|
}}
|
|
}});
|
|
|
|
// Re-apply styles for markers that were just added to map
|
|
allMarkers.forEach(function(m) {{
|
|
if (map.hasLayer(m)) {{
|
|
var r = ratings[m._data.hashId];
|
|
if (r) applyMarkerStyle(m, r.status);
|
|
}}
|
|
}});
|
|
|
|
document.getElementById('visible-count').textContent = visible;
|
|
}}
|
|
|
|
// Initialize ratings on load
|
|
restoreRatings();
|
|
|
|
</script>
|
|
</body>
|
|
</html>"""
|
|
|
|
path = Path(output_path)
|
|
path.write_text(html, encoding="utf-8")
|
|
logger.info(f"\n✓ Mapa uložena: {path.resolve()}")
|
|
return str(path.resolve())
|
|
|
|
|
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Scrape apartments from Sreality.cz")
|
|
parser.add_argument("--max-pages", type=int, help="Maximum number of pages to scrape")
|
|
parser.add_argument("--max-properties", type=int, help="Maximum number of properties to fetch details for")
|
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
|
help="Logging level (default: INFO)")
|
|
args = parser.parse_args()
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=getattr(logging, args.log_level),
|
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
|
handlers=[logging.StreamHandler()]
|
|
)
|
|
|
|
start = time.time()
|
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
|
|
|
if estates:
|
|
# Save raw data as JSON backup
|
|
json_path = Path("byty_sreality.json")
|
|
json_path.write_text(
|
|
json.dumps(estates, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
logger.info(f"✓ Data uložena: {json_path.resolve()}")
|
|
|
|
# Generate map
|
|
map_path = generate_map(estates)
|
|
elapsed = time.time() - start
|
|
logger.info(f"\n⏱ Celkový čas: {elapsed:.0f} s")
|
|
logger.info(f"\nOtevři v prohlížeči:\n file://{map_path}")
|
|
else:
|
|
logger.info("\nŽádné byty neodpovídají kritériím :(")
|