Files
maru-hleda-byt/scrape_and_map.py
Marie Michalova b8d4d44164 Rewrite PSN + CityHome scrapers, add price/m² map coloring, ratings system, and status dashboard
- Rewrite PSN scraper to use /api/units-list endpoint (single API call, no HTML parsing)
- Fix CityHome scraper: GPS from multiple URL patterns, address from table cells, no 404 retries
- Color map markers by price/m² instead of disposition (blue→green→orange→red scale)
- Add persistent rating system (favorite/reject) with Flask ratings server and localStorage fallback
- Rejected markers show original color at reduced opacity with 🚫 SVG overlay
- Favorite markers shown as  star icons with gold pulse animation
- Add "new today" marker logic (scraped_at == today) with larger pulsing green outline
- Add filter panel with floor, price, hide-rejected controls and ☰/✕ toggle buttons
- Add generate_status.py for scraper run statistics and status.html dashboard
- Add scraped_at field to all scrapers for freshness tracking
- Update run_all.sh with log capture and status generation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 15:15:25 +01:00

1111 lines
42 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Sreality scraper + interactive map generator.
Hledá byty na prodej v Praze podle zadaných kritérií a generuje HTML mapu.
"""
from __future__ import annotations
import argparse
import json
import logging
import math
import time
import urllib.request
import urllib.parse
from datetime import datetime
from pathlib import Path
logger = logging.getLogger(__name__)
# ── Konfigurace filtrů ──────────────────────────────────────────────────────
MAX_PRICE = 13_500_000 # Kč
MIN_AREA = 69 # m² — vyloučit byty menší než toto
DISPOSITIONS = [6, 7, 8, 9, 10, 11, 12] # 3+kk, 3+1, 4+kk, 4+1, 5+kk, 5+1, 6+
MIN_FLOOR = 2 # stáhneme od 2. NP, na mapě označíme 2. NP zvlášť
REGION_ID = 10 # Praha
PER_PAGE = 60
# Sreality API base
API_BASE = "https://www.sreality.cz/api/cs/v2/estates"
DETAIL_API = "https://www.sreality.cz/api/cs/v2/estates/{}"
# Klíčová slova pro vyloučení panelových domů / sídlišť
PANEL_KEYWORDS = {"panel", "panelový", "panelový dům", "panelák"}
SIDLISTE_KEYWORDS = {"sídliště", "sidliste"}
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept": "application/json",
}
def api_get(url: str) -> dict:
"""Fetch JSON from Sreality API."""
logger.debug(f"HTTP GET request: {url}")
logger.debug(f"Headers: {HEADERS}")
req = urllib.request.Request(url, headers=HEADERS)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
response_data = resp.read().decode("utf-8")
logger.debug(f"HTTP response: status={resp.status}, size={len(response_data)} bytes")
logger.debug(f"Response preview: {response_data[:200]}")
return json.loads(response_data)
except (urllib.error.URLError, ConnectionError, OSError) as e:
logger.error(f"HTTP request failed for {url}: {e}", exc_info=True)
raise
def build_list_url(disposition: int, page: int = 1) -> str:
"""Build Sreality API URL for a given disposition."""
params = {
"category_main_cb": 1, # byty
"category_type_cb": 1, # prodej
"category_sub_cb": disposition,
"locality_region_id": REGION_ID,
"czk_price_summary_order2": f"0|{MAX_PRICE}",
"floor_number": f"{MIN_FLOOR}|99",
"per_page": PER_PAGE,
"page": page,
}
return f"{API_BASE}?{urllib.parse.urlencode(params)}"
def fetch_estates_for_disposition(disposition: int, max_pages: int | None = None) -> list[dict]:
"""Fetch all estates for a given disposition, handling pagination."""
url = build_list_url(disposition, page=1)
logger.info(f"Fetching disposition {disposition}, page 1 ...")
data = api_get(url)
total = data.get("result_size", 0)
estates = data.get("_embedded", {}).get("estates", [])
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 0
logger.info(f"{total} results, {total_pages} pages")
# Limit pages if max_pages is specified
if max_pages is not None:
original_pages = total_pages
total_pages = min(total_pages, max_pages)
logger.debug(f"Max pages limit reached: limiting {original_pages} pages to {total_pages}")
for page in range(2, total_pages + 1):
time.sleep(0.5)
logger.info(f"Fetching page {page}/{total_pages} ...")
url = build_list_url(disposition, page=page)
data = api_get(url)
estates.extend(data.get("_embedded", {}).get("estates", []))
return estates
def get_estate_detail(hash_id: int) -> dict | None:
"""Fetch detail for a single estate to get floor info and building type."""
try:
url = DETAIL_API.format(hash_id)
logger.debug(f"Fetching detail for hash_id={hash_id}")
detail = api_get(url)
logger.debug(f"Detail fetched for hash_id={hash_id}, keys: {list(detail.keys())[:5]}")
return detail
except Exception as e:
logger.warning(f"Could not fetch detail for hash_id={hash_id}: {e}", exc_info=True)
return None
def parse_floor_from_detail(detail: dict) -> int | None:
"""Extract floor number from detail items."""
for item in detail.get("items", []):
if item.get("name") == "Podlaží" or item.get("name") == "Podlazi":
val = item.get("value", "")
# Format: "3. podlaží z celkem 5 ..." or similar
parts = val.split(".")
if parts:
try:
return int(parts[0].strip())
except ValueError:
pass
return None
def is_panel_or_sidliste(detail: dict) -> tuple[bool, str]:
"""
Check if the estate is panel construction or on a sídliště.
Returns (should_exclude, reason).
"""
reasons = []
for item in detail.get("items", []):
name = (item.get("name") or "").lower()
value = str(item.get("value") or "").lower()
# Check "Stavba" field for panel
if name in ("stavba", "konstrukce"):
if "panel" in value:
reasons.append(f"stavba: {value}")
# Check "Umístění objektu" for sídliště
if name in ("umístění objektu", "umisteni objektu"):
if "sídliště" in value or "sidliste" in value:
reasons.append(f"umístění: {value}")
# Also check description text
description = str(detail.get("text", {}).get("value", "")).lower()
locality_text = str(detail.get("locality", {}).get("value", "")).lower() if isinstance(detail.get("locality"), dict) else ""
return (len(reasons) > 0, "; ".join(reasons))
def disposition_label(sub_cb: int) -> str:
"""Human-readable disposition label."""
labels = {
2: "1+kk", 3: "1+1", 4: "2+kk", 5: "2+1",
6: "3+kk", 7: "3+1", 8: "4+kk", 9: "4+1",
10: "5+kk", 11: "5+1", 12: "6+", 16: "Atypický",
}
return labels.get(sub_cb, "?")
def disposition_url_slug(sub_cb: int) -> str:
"""URL slug for disposition in Sreality URLs."""
slugs = {
2: "1+kk", 3: "1+1", 4: "2+kk", 5: "2+1",
6: "3+kk", 7: "3+1", 8: "4+kk", 9: "4+1",
10: "5+kk", 11: "5+1", 12: "6-a-vice", 16: "atypicky",
}
return slugs.get(sub_cb, "byt")
def sreality_url(hash_id: int, seo: dict) -> str:
"""Build human-readable Sreality URL from estate data."""
cat_type = {1: "prodej", 2: "pronajem"}.get(seo.get("category_type_cb", 1), "prodej")
cat_main = {1: "byt", 2: "dum", 3: "pozemek", 4: "komercni"}.get(seo.get("category_main_cb", 1), "byt")
cat_sub = disposition_url_slug(seo.get("category_sub_cb", 0))
locality = seo.get("locality", "praha")
return f"https://www.sreality.cz/detail/{cat_type}/{cat_main}/{cat_sub}/{locality}/{hash_id}"
def format_price(price: int) -> str:
"""Format price in CZK with spaces."""
s = str(price)
parts = []
while s:
parts.append(s[-3:])
s = s[:-3]
return " ".join(reversed(parts)) + ""
# ── Hlavní scraping ─────────────────────────────────────────────────────────
def load_cache(json_path: str = "byty_sreality.json") -> dict[int, dict]:
"""Load previously scraped data as cache keyed by hash_id."""
path = Path(json_path)
if not path.exists():
return {}
try:
data = json.loads(path.read_text(encoding="utf-8"))
return {e["hash_id"]: e for e in data if "hash_id" in e}
except (json.JSONDecodeError, KeyError):
return {}
def scrape(max_pages: int | None = None, max_properties: int | None = None):
"""Main scraping function. Returns list of filtered estates."""
all_estates_raw = []
cache = load_cache()
logger.info("=" * 60)
logger.info("Stahuji inzeráty ze Sreality.cz")
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
logger.info(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
logger.info(f"Patro: od {MIN_FLOOR}. NP")
logger.info(f"Region: Praha")
if cache:
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
if max_pages:
logger.info(f"Limit stran: {max_pages}")
if max_properties:
logger.info(f"Limit majetků: {max_properties}")
logger.info("=" * 60)
for disp in DISPOSITIONS:
logger.info(f"\n▸ Dispozice: {disposition_label(disp)}")
estates = fetch_estates_for_disposition(disp, max_pages=max_pages)
for e in estates:
e["_disposition_cb"] = disp
all_estates_raw.extend(estates)
time.sleep(0.5)
# Deduplicate by hash_id
seen = set()
unique_estates = []
for e in all_estates_raw:
hid = e.get("hash_id")
if hid and hid not in seen:
seen.add(hid)
unique_estates.append(e)
logger.info(f"\n{'=' * 60}")
logger.info(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
logger.info(f"Stahuji detaily pro filtrování panelu/sídlišť...")
logger.info(f"{'=' * 60}")
# Fetch details and filter
results = []
excluded_panel = 0
excluded_no_gps = 0
excluded_no_detail = 0
excluded_small = 0
cache_hits = 0
details_fetched = 0
for i, estate in enumerate(unique_estates):
# Stop if max_properties reached
if max_properties is not None and details_fetched >= max_properties:
logger.debug(f"Max properties limit reached: {max_properties}")
break
hash_id = estate.get("hash_id")
gps = estate.get("gps", {})
if not gps or not gps.get("lat") or not gps.get("lon"):
excluded_no_gps += 1
logger.debug(f"Filter: hash_id={hash_id} - excluded (no GPS)")
continue
# Check cache — if hash_id exists and price unchanged, reuse
cached = cache.get(hash_id)
if cached and cached.get("price") == estate.get("price", 0):
cache_hits += 1
logger.debug(f"Cache hit for hash_id={hash_id}")
results.append(cached)
continue
# Fetch detail
time.sleep(0.3)
detail = get_estate_detail(hash_id)
if not detail:
excluded_no_detail += 1
logger.debug(f"Filter: hash_id={hash_id} - excluded (no detail)")
continue
# Check panel / sídliště
is_excluded, reason = is_panel_or_sidliste(detail)
if is_excluded:
excluded_panel += 1
logger.debug(f"Filter: hash_id={hash_id} - excluded (panel/sídliště): {reason}")
logger.info(f"✗ Vyloučen #{hash_id}: {reason}")
continue
# Parse floor
floor = parse_floor_from_detail(detail)
# Get area — field name can be truncated ("Užitná ploch" or "Užitná plocha")
area = None
for item in detail.get("items", []):
name = item.get("name", "")
if "žitná ploch" in name or "zitna ploch" in name.lower():
try:
area = int(item["value"])
except (ValueError, KeyError):
pass
break
# Filter by minimum area
if area is not None and area < MIN_AREA:
excluded_small += 1
logger.debug(f"Filter: hash_id={hash_id} - excluded (area {area} m² < {MIN_AREA} m²)")
logger.info(f"✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
continue
# Get building type
building_type = None
for item in detail.get("items", []):
if item.get("name") in ("Stavba", "Konstrukce"):
building_type = item.get("value")
break
# Get ownership
ownership = None
for item in detail.get("items", []):
if item.get("name") in ("Vlastnictví", "Vlastnictvi"):
ownership = item.get("value")
break
disp_cb = estate.get("_disposition_cb") or estate.get("seo", {}).get("category_sub_cb")
seo = estate.get("seo", {})
result = {
"hash_id": hash_id,
"name": estate.get("name", ""),
"price": estate.get("price", 0),
"price_formatted": format_price(estate.get("price", 0)),
"locality": estate.get("locality", ""),
"lat": gps["lat"],
"lon": gps["lon"],
"disposition": disposition_label(disp_cb),
"floor": floor,
"area": area,
"building_type": building_type,
"ownership": ownership,
"url": sreality_url(hash_id, seo),
"image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
}
results.append(result)
details_fetched += 1
if (i + 1) % 20 == 0:
logger.info(f"Zpracováno {i + 1}/{len(unique_estates)} ...")
logger.info(f"\n{'=' * 60}")
logger.info(f"Výsledky:")
logger.info(f" Celkem staženo: {len(unique_estates)}")
logger.info(f" Z cache (přeskočeno): {cache_hits}")
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
logger.info(f" Vyloučeno (<{MIN_AREA} m²): {excluded_small}")
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
logger.info(f" Vyloučeno (bez detailu): {excluded_no_detail}")
logger.info(f" ✓ Vyhovující byty: {len(results)}")
logger.info(f"{'=' * 60}")
return results
# ── Generování HTML mapy ────────────────────────────────────────────────────
def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
"""Generate an interactive Leaflet.js HTML map."""
# Color by price per m² — cool blue→warm red scale, no yellow
# Thresholds based on Prague market distribution (p25=120k, p50=144k, p75=162k)
price_color_scale = [
(110_000, "#1565C0"), # < 110k/m² → deep blue (levné)
(130_000, "#42A5F5"), # 110130k → light blue
(150_000, "#66BB6A"), # 130150k → green (střed)
(165_000, "#EF6C00"), # 150165k → dark orange
(float("inf"), "#C62828"), # > 165k → dark red (drahé)
]
def price_color(estate: dict) -> str:
price = estate.get("price") or 0
area = estate.get("area") or 0
if not area:
return "#9E9E9E"
ppm2 = price / area
for threshold, color in price_color_scale:
if ppm2 < threshold:
return color
return "#E53935"
# Legend bands for info panel (built once)
price_legend_items = (
'<div style="margin-bottom:4px;font-size:12px;color:#555;font-weight:600;">Cena / m²:</div>'
)
bands = [
("#1565C0", "< 110 000 Kč/m²"),
("#42A5F5", "110 130 000 Kč/m²"),
("#66BB6A", "130 150 000 Kč/m²"),
("#EF6C00", "150 165 000 Kč/m²"),
("#C62828", "> 165 000 Kč/m²"),
("#9E9E9E", "cena/plocha neuvedena"),
]
for bcolor, blabel in bands:
price_legend_items += (
f'<div style="display:flex;align-items:center;gap:6px;margin:2px 0;">'
f'<span style="width:14px;height:14px;border-radius:50%;background:{bcolor};'
f'display:inline-block;border:2px solid white;box-shadow:0 1px 3px rgba(0,0,0,0.3);flex-shrink:0;"></span>'
f'<span>{blabel}</span></div>'
)
# New marker indicator — bigger dot, no extra border
price_legend_items += (
'<div style="display:flex;align-items:center;gap:6px;margin:6px 0 0 0;'
'padding-top:6px;border-top:1px solid #eee;">'
'<span style="width:18px;height:18px;border-radius:50%;background:#66BB6A;'
'display:inline-block;box-shadow:0 1px 4px rgba(0,0,0,0.35);flex-shrink:0;"></span>'
'<span>Nové (z dnešního scrapu) — větší</span></div>'
)
markers_js = ""
for e in estates:
color = price_color(e)
floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
area_text = f'{e["area"]}' if e["area"] else "neuvedeno"
building_text = e["building_type"] or "neuvedeno"
ownership_text = e["ownership"] or "neuvedeno"
# Floor warning for 2nd floor
floor_note = ""
if e["floor"] == 2:
floor_note = '<br><span style="color:#FF9800;font-weight:bold;">⚠ 2. NP — zvážit klidnost lokality</span>'
source = e.get("source", "sreality")
source_labels = {"sreality": "Sreality", "realingo": "Realingo", "bezrealitky": "Bezrealitky", "idnes": "iDNES", "psn": "PSN", "cityhome": "CityHome"}
source_colors = {"sreality": "#1976D2", "realingo": "#00897B", "bezrealitky": "#E91E63", "idnes": "#FF6F00", "psn": "#D32F2F", "cityhome": "#D32F2F"}
source_label = source_labels.get(source, source)
source_color = source_colors.get(source, "#999")
hash_id = e.get("hash_id", "")
scraped_at = e.get("scraped_at", "")
is_new = scraped_at == datetime.now().strftime("%Y-%m-%d")
new_badge = (
'<span style="margin-left:6px;font-size:11px;background:#FFD600;color:#333;'
'padding:1px 6px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
if is_new else ""
)
popup = (
f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
f'padding:1px 6px;border-radius:3px;">{source_label}</span>{new_badge}<br>'
f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
f'{floor_note}<br><br>'
f'<b>{e["locality"]}</b><br>'
f'Stavba: {building_text}<br>'
f'Vlastnictví: {ownership_text}<br><br>'
f'<a href="{e["url"]}" target="_blank" '
f'style="color:{source_color};text-decoration:none;font-weight:bold;">'
f'→ Otevřít na {source_label}</a>'
f'<div style="margin-top:10px;padding-top:8px;border-top:1px solid #eee;">'
f'<div style="display:flex;gap:6px;align-items:center;">'
f'<button class="rate-btn fav-btn" data-action="fav" '
f'style="padding:4px 12px;border:1px solid #ccc;border-radius:4px;'
f'cursor:pointer;font-size:16px;background:#fff;">⭐</button>'
f'<button class="rate-btn rej-btn" data-action="reject" '
f'style="padding:4px 12px;border:1px solid #ccc;border-radius:4px;'
f'cursor:pointer;font-size:16px;background:#fff;">🚫</button>'
f'<span class="rating-status" style="margin-left:6px;font-size:12px;color:#999;"></span>'
f'</div>'
f'<textarea class="rating-note" placeholder="Poznámka..." '
f'style="width:100%;margin-top:6px;padding:4px;border:1px solid #ddd;'
f'border-radius:4px;font-size:12px;resize:vertical;min-height:32px;'
f'display:none;font-family:system-ui,sans-serif;"></textarea>'
f'</div></div>'
)
# Escape for JS
popup = popup.replace("'", "\\'").replace("\n", "")
is_fav = source in ("psn", "cityhome")
if is_fav:
marker_fn = "addHeartMarker"
elif is_new:
marker_fn = "addNewMarker"
else:
marker_fn = "addMarker"
markers_js += (
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
)
# Build legend — price per m² bands + disposition counts
legend_items = price_legend_items
# Disposition counts below the color legend
disp_counts = {}
for e in estates:
d = e["disposition"]
disp_counts[d] = disp_counts.get(d, 0) + 1
disp_order = ["3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+"]
disp_summary = ", ".join(
f"{d} ({disp_counts[d]})" for d in disp_order if d in disp_counts
)
legend_items += (
f'<div style="margin-top:8px;padding-top:6px;border-top:1px solid #eee;'
f'font-size:12px;color:#666;">{disp_summary}</div>'
)
# Heart marker legend for PSN/CityHome
fav_count = sum(1 for e in estates if e.get("source") in ("psn", "cityhome"))
if fav_count > 0:
legend_items += (
f'<div style="display:flex;align-items:center;gap:6px;margin:8px 0 3px 0;'
f'padding-top:6px;border-top:1px solid #eee;">'
f'<svg width="14" height="14" viewBox="0 0 24 24">'
f'<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
f'C2 5.42 4.42 3 7.5 3c1.74 0 3.41.81 4.5 2.09C13.09 3.81 '
f'14.76 3 16.5 3 19.58 3 22 5.42 22 8.5c0 3.78-3.4 6.86-8.55 '
f'11.54L12 21.35z" fill="#D32F2F"/></svg>'
f'<span>PSN / CityHome ({fav_count})</span></div>'
)
# Price stats
prices = [e["price"] for e in estates if e["price"] > 0]
min_price = format_price(min(prices)) if prices else "N/A"
max_price = format_price(max(prices)) if prices else "N/A"
avg_price = format_price(int(sum(prices) / len(prices))) if prices else "N/A"
html = f"""<!DOCTYPE html>
<html lang="cs">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Byty v Praze — mapa ({len(estates)} bytů)</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{ font-family: system-ui, -apple-system, sans-serif; }}
#map {{ width: 100%; height: 100vh; }}
.heart-icon {{ background: none !important; border: none !important; }}
.star-icon {{ background: none !important; border: none !important; }}
.rate-btn:hover {{ background: #f0f0f0 !important; }}
.rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
.rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
@keyframes pulse-glow {{
0% {{ box-shadow: 0 0 4px 2px rgba(255, 193, 7, 0.4); }}
50% {{ box-shadow: 0 0 10px 5px rgba(255, 193, 7, 0.7); }}
100% {{ box-shadow: 0 0 4px 2px rgba(255, 193, 7, 0.4); }}
}}
.marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
.heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
.heart-icon-rej {{ opacity: 0.4 !important; filter: grayscale(1); }}
.reject-overlay {{ background: none !important; border: none !important; pointer-events: none !important; }}
@keyframes pulse-new {{
0% {{ stroke-opacity: 1; stroke-width: 3px; r: 11; }}
50% {{ stroke-opacity: 0.4; stroke-width: 6px; r: 12; }}
100% {{ stroke-opacity: 1; stroke-width: 3px; r: 11; }}
}}
.marker-new {{ animation: pulse-new 2s ease-in-out infinite; }}
.info-panel {{
position: absolute; top: 10px; right: 10px; z-index: 1000;
background: white; padding: 16px; border-radius: 10px;
box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
font-size: 13px; line-height: 1.5;
transition: transform 0.3s ease, opacity 0.3s ease;
}}
.info-panel.collapsed {{
transform: translateX(calc(100% + 20px));
opacity: 0; pointer-events: none;
}}
.panel-open-btn {{
position: absolute; top: 10px; right: 10px; z-index: 1001;
width: 40px; height: 40px; border-radius: 8px;
background: white; border: none; cursor: pointer;
box-shadow: 0 2px 12px rgba(0,0,0,0.15);
font-size: 20px; display: flex; align-items: center; justify-content: center;
transition: opacity 0.3s ease;
}}
.panel-open-btn.hidden {{ opacity: 0; pointer-events: none; }}
.panel-close-btn {{
position: absolute; top: 8px; right: 8px;
width: 28px; height: 28px; border-radius: 6px;
background: none; border: 1px solid #ddd; cursor: pointer;
font-size: 16px; display: flex; align-items: center; justify-content: center;
color: #888;
}}
.panel-close-btn:hover {{ background: #f0f0f0; color: #333; }}
.info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
.info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
.filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
.filter-section label {{ display: flex; align-items: center; gap: 6px; margin: 3px 0; cursor: pointer; }}
.filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
#floor-filter {{ margin-top: 8px; }}
#floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
.status-link {{ display: block; margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; text-align: center; }}
.status-link a {{ color: #1976D2; text-decoration: none; font-size: 12px; }}
@media (max-width: 600px) {{
.info-panel {{ max-width: calc(100vw - 60px); right: 10px; }}
.info-panel.collapsed {{ transform: translateX(calc(100% + 20px)); }}
.panel-close-btn {{ top: 6px; right: 6px; }}
}}
</style>
</head>
<body>
<div id="map"></div>
<button class="panel-open-btn hidden" id="panel-open-btn" onclick="togglePanel()">☰</button>
<div class="info-panel" id="info-panel">
<button class="panel-close-btn" id="panel-close-btn" onclick="togglePanel()">✕</button>
<h2>Byty v Praze</h2>
<div class="stats">
<div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
<div>Cena: {min_price}{max_price}</div>
<div>Průměr: {avg_price}</div>
</div>
{legend_items}
<div class="filter-section">
<b>Filtry:</b>
<div id="floor-filter">
<label>Patro od:
<select id="min-floor" onchange="applyFilters()">
<option value="2">2. NP (vše)</option>
<option value="3">3. NP+</option>
<option value="4">4. NP+</option>
<option value="5">5. NP+</option>
</select>
</label>
</div>
<div style="margin-top:6px;">
<label>Max cena:
<select id="max-price" onchange="applyFilters()">
<option value="13500000">13 500 000 Kč</option>
<option value="12000000">12 000 000 Kč</option>
<option value="10000000">10 000 000 Kč</option>
<option value="8000000">8 000 000 Kč</option>
</select>
</label>
</div>
</div>
<div class="filter-section">
<div id="rating-counts" style="margin-bottom:6px;font-size:12px;color:#666;">
⭐ 0 oblíbených, 🚫 0 zamítnutých
</div>
<label>
<input type="checkbox" id="hide-rejected" onchange="applyFilters()">
Skrýt zamítnuté
</label>
</div>
<div class="status-link"><a href="status.html">Scraper status</a></div>
</div>
<script>
// Prague center
var map = L.map('map').setView([50.075, 14.437], 12);
L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_nolabels/{{z}}/{{x}}/{{y}}{{r}}.png', {{
attribution: '&copy; OpenStreetMap contributors &copy; CARTO',
maxZoom: 19,
subdomains: 'abcd',
}}).addTo(map);
// Labels as separate layer on top (so markers sit between background and labels)
L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_only_labels/{{z}}/{{x}}/{{y}}{{r}}.png', {{
maxZoom: 19,
subdomains: 'abcd',
pane: 'shadowPane',
}}).addTo(map);
var allMarkers = [];
function addMarker(lat, lon, color, popup, hashId) {{
var marker = L.circleMarker([lat, lon], {{
radius: 8,
fillColor: color,
color: '#fff',
weight: 2,
opacity: 1,
fillOpacity: 0.85,
}}).bindPopup(popup);
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId }};
allMarkers.push(marker);
marker.addTo(map);
}}
function addNewMarker(lat, lon, color, popup, hashId) {{
var marker = L.circleMarker([lat, lon], {{
radius: 12,
fillColor: color,
color: color,
weight: 4,
opacity: 0.35,
fillOpacity: 0.95,
}}).bindPopup(popup);
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isNew: true }};
allMarkers.push(marker);
marker.addTo(map);
marker.on('add', function() {{
if (marker._path) marker._path.classList.add('marker-new');
}});
}}
function heartIcon(color) {{
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
+ '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
+ 'C2 5.42 4.42 3 7.5 3c1.74 0 3.41.81 4.5 2.09C13.09 3.81 '
+ '14.76 3 16.5 3 19.58 3 22 5.42 22 8.5c0 3.78-3.4 6.86-8.55 '
+ '11.54L12 21.35z" fill="' + color + '" stroke="white" stroke-width="1.5"/></svg>';
return L.divIcon({{
html: svg,
className: 'heart-icon',
iconSize: [24, 24],
iconAnchor: [12, 22],
popupAnchor: [0, -18],
}});
}}
function starIcon() {{
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" viewBox="0 0 24 24">'
+ '<path d="M12 2l3.09 6.26L22 9.27l-5 4.87L18.18 22 12 18.27 '
+ '5.82 22 7 14.14 2 9.27l6.91-1.01L12 2z" '
+ 'fill="#FFC107" stroke="#F57F17" stroke-width="1" '
+ 'filter="drop-shadow(0 1px 3px rgba(0,0,0,0.3))"/></svg>';
return L.divIcon({{
html: svg,
className: 'star-icon',
iconSize: [28, 28],
iconAnchor: [14, 14],
popupAnchor: [0, -14],
}});
}}
function addHeartMarker(lat, lon, color, popup, hashId) {{
var marker = L.marker([lat, lon], {{
icon: heartIcon(color),
}}).bindPopup(popup);
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true }};
allMarkers.push(marker);
marker.addTo(map);
}}
{markers_js}
// ── Rating system ──────────────────────────────────────────────
var RATINGS_KEY = 'byty_ratings';
function loadRatings() {{
try {{
var data = localStorage.getItem(RATINGS_KEY);
return data ? JSON.parse(data) : {{}};
}} catch(e) {{ return {{}}; }}
}}
function saveRatings(ratings) {{
localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
}}
function addRejectStrike(marker) {{
removeRejectStrike(marker);
var color = marker._data.color || '#999';
// SVG "no entry" icon — circle with diagonal line, colored to match marker
var svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">'
+ '<circle cx="12" cy="12" r="10" fill="none" stroke="' + color + '" stroke-width="2.5" opacity="0.85"/>'
+ '<line x1="5.5" y1="5.5" x2="18.5" y2="18.5" stroke="' + color + '" stroke-width="2.5" stroke-linecap="round" opacity="0.85"/>'
+ '</svg>';
var icon = L.divIcon({{
className: 'reject-overlay',
html: svg,
iconSize: [20, 20],
iconAnchor: [10, 10],
}});
var m = L.marker([marker._data.lat, marker._data.lon], {{
icon: icon,
interactive: false,
pane: 'markerPane',
}});
m.addTo(map);
marker._rejectStrike = m;
}}
function removeRejectStrike(marker) {{
if (marker._rejectStrike) {{
map.removeLayer(marker._rejectStrike);
marker._rejectStrike = null;
}}
}}
function applyMarkerStyle(marker, status) {{
if (marker._data.isHeart) {{
var el = marker._icon;
if (!el) return;
el.classList.remove('heart-icon-fav', 'heart-icon-rej');
el.style.transform = el.style.transform.replace(/scale\\([^)]*\\)/, '');
if (status === 'fav') {{
el.classList.add('heart-icon-fav');
el.style.transform += ' scale(1.3)';
}} else if (status === 'reject') {{
el.classList.add('heart-icon-rej');
}}
}} else {{
if (status === 'fav') {{
removeRejectStrike(marker);
if (!marker._data._origCircle) marker._data._origCircle = true;
var popup = marker.getPopup();
var popupContent = popup ? popup.getContent() : '';
var wasOnMap = map.hasLayer(marker);
if (wasOnMap) map.removeLayer(marker);
var starMarker = L.marker([marker._data.lat, marker._data.lon], {{
icon: starIcon(),
}}).bindPopup(popupContent);
starMarker._data = marker._data;
var idx = allMarkers.indexOf(marker);
if (idx !== -1) allMarkers[idx] = starMarker;
if (wasOnMap) starMarker.addTo(map);
}} else if (status === 'reject') {{
if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
revertToCircle(marker, {{ radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1 }});
}} else {{
marker.setStyle({{
radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1,
}});
if (marker._path) marker._path.classList.remove('marker-favorite');
}}
// Add strikethrough line over the marker
addRejectStrike(marker);
}} else {{
if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
revertToCircle(marker, {{ radius: 8, fillColor: marker._data.color, color: '#fff', weight: 2, fillOpacity: 0.85 }});
}} else {{
marker.setStyle({{
radius: 8, fillColor: marker._data.color, color: '#fff',
weight: 2, fillOpacity: 0.85,
}});
if (marker._path) marker._path.classList.remove('marker-favorite');
}}
if (marker._path) marker._path.classList.remove('marker-rejected');
removeRejectStrike(marker);
}}
}}
}}
function revertToCircle(marker, style) {{
var popup = marker.getPopup();
var popupContent = popup ? popup.getContent() : '';
var wasOnMap = map.hasLayer(marker);
if (wasOnMap) map.removeLayer(marker);
var cm = L.circleMarker([marker._data.lat, marker._data.lon], style).bindPopup(popupContent);
cm._data = marker._data;
delete cm._data._starRef;
var idx = allMarkers.indexOf(marker);
if (idx !== -1) allMarkers[idx] = cm;
if (wasOnMap) cm.addTo(map);
}}
function rateMarker(marker, action) {{
var hashId = marker._data.hashId;
var ratings = loadRatings();
var current = ratings[hashId];
if (current && current.status === action) {{
delete ratings[hashId];
saveRatings(ratings);
applyMarkerStyle(marker, null);
updateRatingCounts();
applyFilters();
return null;
}} else {{
var note = (current && current.note) || '';
ratings[hashId] = {{ status: action, note: note }};
saveRatings(ratings);
applyMarkerStyle(marker, action);
updateRatingCounts();
applyFilters();
return action;
}}
}}
function setNote(hashId, note) {{
var ratings = loadRatings();
if (ratings[hashId]) {{
ratings[hashId].note = note;
saveRatings(ratings);
}}
}}
function updateRatingCounts() {{
var ratings = loadRatings();
var favCount = 0, rejCount = 0;
for (var id in ratings) {{
if (ratings[id].status === 'fav') favCount++;
if (ratings[id].status === 'reject') rejCount++;
}}
var el = document.getElementById('rating-counts');
if (el) el.innerHTML = '' + favCount + ' oblíbených, 🚫 ' + rejCount + ' zamítnutých';
}}
function restoreRatings() {{
var ratings = loadRatings();
allMarkers.forEach(function(m) {{
var r = ratings[m._data.hashId];
if (r) applyMarkerStyle(m, r.status);
}});
updateRatingCounts();
}}
// ── Popup rating handler ───────────────────────────────────────
map.on('popupopen', function(e) {{
var container = e.popup.getElement();
if (!container) return;
var wrapper = container.querySelector('[data-hashid]');
if (!wrapper) return;
var hashId = wrapper.getAttribute('data-hashid');
var marker = null;
for (var i = 0; i < allMarkers.length; i++) {{
if (String(allMarkers[i]._data.hashId) === String(hashId)) {{
marker = allMarkers[i]; break;
}}
}}
if (!marker) return;
var ratings = loadRatings();
var current = ratings[hashId];
var favBtn = container.querySelector('.fav-btn');
var rejBtn = container.querySelector('.rej-btn');
var statusEl = container.querySelector('.rating-status');
var noteEl = container.querySelector('.rating-note');
// Restore state in popup
favBtn.classList.remove('active-fav');
rejBtn.classList.remove('active-rej');
statusEl.textContent = '';
noteEl.style.display = 'none';
noteEl.value = '';
if (current) {{
if (current.status === 'fav') {{
favBtn.classList.add('active-fav');
statusEl.textContent = 'Oblíbený';
statusEl.style.color = '#F9A825';
}} else if (current.status === 'reject') {{
rejBtn.classList.add('active-rej');
statusEl.textContent = 'Zamítnutý';
statusEl.style.color = '#E53935';
}}
noteEl.style.display = 'block';
noteEl.value = current.note || '';
}}
favBtn.onclick = function() {{
var result = rateMarker(marker, 'fav');
favBtn.classList.remove('active-fav');
rejBtn.classList.remove('active-rej');
if (result === 'fav') {{
favBtn.classList.add('active-fav');
statusEl.textContent = 'Oblíbený';
statusEl.style.color = '#F9A825';
noteEl.style.display = 'block';
}} else {{
statusEl.textContent = '';
noteEl.style.display = 'none';
noteEl.value = '';
}}
}};
rejBtn.onclick = function() {{
var result = rateMarker(marker, 'reject');
favBtn.classList.remove('active-fav');
rejBtn.classList.remove('active-rej');
if (result === 'reject') {{
rejBtn.classList.add('active-rej');
statusEl.textContent = 'Zamítnutý';
statusEl.style.color = '#E53935';
noteEl.style.display = 'block';
}} else {{
statusEl.textContent = '';
noteEl.style.display = 'none';
noteEl.value = '';
}}
}};
var noteTimer = null;
noteEl.oninput = function() {{
clearTimeout(noteTimer);
noteTimer = setTimeout(function() {{
setNote(hashId, noteEl.value);
}}, 500);
}};
}});
// ── Filters ────────────────────────────────────────────────────
function applyFilters() {{
var minFloor = parseInt(document.getElementById('min-floor').value);
var maxPrice = parseInt(document.getElementById('max-price').value);
var hideRejected = document.getElementById('hide-rejected').checked;
var ratings = loadRatings();
var visible = 0;
allMarkers.forEach(function(m) {{
var popup = m.getPopup().getContent();
var floorMatch = popup.match(/(\\d+)\\. NP/);
var priceMatch = popup.match(/([\\d\\s]+)\\sKč/);
var floor = floorMatch ? parseInt(floorMatch[1]) : null;
var price = priceMatch ? parseInt(priceMatch[1].replace(/\\s/g, '')) : 0;
var show = true;
if (floor !== null && floor < minFloor) show = false;
if (price > maxPrice) show = false;
var r = ratings[m._data.hashId];
if (hideRejected && r && r.status === 'reject') show = false;
if (show) {{
if (!map.hasLayer(m)) m.addTo(map);
visible++;
// Show strike line if rejected and visible
if (m._rejectStrike && !map.hasLayer(m._rejectStrike)) m._rejectStrike.addTo(map);
}} else {{
if (map.hasLayer(m)) map.removeLayer(m);
// Hide strike line when marker hidden
if (m._rejectStrike && map.hasLayer(m._rejectStrike)) map.removeLayer(m._rejectStrike);
}}
}});
// Re-apply styles for markers that were just added to map
allMarkers.forEach(function(m) {{
if (map.hasLayer(m)) {{
var r = ratings[m._data.hashId];
if (r) applyMarkerStyle(m, r.status);
}}
}});
document.getElementById('visible-count').textContent = visible;
}}
// Initialize ratings on load
restoreRatings();
// ── Panel toggle ──────────────────────────────────────────────
function togglePanel() {{
var panel = document.getElementById('info-panel');
var openBtn = document.getElementById('panel-open-btn');
var isOpen = !panel.classList.contains('collapsed');
if (isOpen) {{
panel.classList.add('collapsed');
openBtn.classList.remove('hidden');
}} else {{
panel.classList.remove('collapsed');
openBtn.classList.add('hidden');
}}
}}
// On mobile, start with panel collapsed
if (window.innerWidth <= 600) {{
document.getElementById('info-panel').classList.add('collapsed');
document.getElementById('panel-open-btn').classList.remove('hidden');
}}
</script>
</body>
</html>"""
path = Path(output_path)
path.write_text(html, encoding="utf-8")
logger.info(f"\n✓ Mapa uložena: {path.resolve()}")
return str(path.resolve())
# ── Main ─────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Scrape apartments from Sreality.cz")
parser.add_argument("--max-pages", type=int, help="Maximum number of pages to scrape")
parser.add_argument("--max-properties", type=int, help="Maximum number of properties to fetch details for")
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Logging level (default: INFO)")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=getattr(logging, args.log_level),
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
handlers=[logging.StreamHandler()]
)
start = time.time()
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
if estates:
# Save raw data as JSON backup
json_path = Path("byty_sreality.json")
json_path.write_text(
json.dumps(estates, ensure_ascii=False, indent=2),
encoding="utf-8",
)
logger.info(f"✓ Data uložena: {json_path.resolve()}")
# Generate map
map_path = generate_map(estates)
elapsed = time.time() - start
logger.info(f"\n⏱ Celkový čas: {elapsed:.0f} s")
logger.info(f"\nOtevři v prohlížeči:\n file://{map_path}")
else:
logger.info("\nŽádné byty neodpovídají kritériím :(")