Tag Docker images with both git tag and latest

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Add first_seen/last_updated timestamps to track property freshness
2026-02-15 21:05:51 +01:00 · 2026-02-15 21:03:08 +01:00
16 changed files with 1927 additions and 13358 deletions
--- a/.gitea/workflows/build.yaml
+++ b/.gitea/workflows/build.yaml
@@ -30,6 +30,7 @@ jobs:
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            TAG=${{ inputs.tag }}
          fi
-          IMAGE=gitea.home.hrajfrisbee.cz/${{ github.repository }}:$TAG
+          REPO=gitea.home.hrajfrisbee.cz/${{ github.repository }}
-          docker build -f build/Dockerfile -t $IMAGE .
+          docker build -f build/Dockerfile -t $REPO:$TAG -t $REPO:latest .
-          docker push $IMAGE
+          docker push $REPO:$TAG
          docker push $REPO:latest
--- a/build/Dockerfile
+++ b/build/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 COPY scrape_and_map.py scrape_realingo.py scrape_bezrealitky.py \
     scrape_idnes.py scrape_psn.py scrape_cityhome.py \
-     merge_and_map.py regen_map.py run_all.sh ratings_server.py ./
+     merge_and_map.py regen_map.py run_all.sh ./
 COPY build/crontab /etc/crontabs/root
 COPY build/entrypoint.sh /entrypoint.sh
@@ -18,7 +18,7 @@ RUN chmod +x /entrypoint.sh run_all.sh
 RUN mkdir -p /app/data
-EXPOSE 8080 8081
+EXPOSE 8080
 HEALTHCHECK --interval=60s --timeout=5s --start-period=300s \
    CMD wget -q -O /dev/null http://localhost:8080/ || exit 1
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -6,7 +6,7 @@ DATA_DIR="/app/data"
 # Create symlinks so scripts (which write to /app/) persist data to the volume
 for f in byty_sreality.json byty_realingo.json byty_bezrealitky.json \
         byty_idnes.json byty_psn.json byty_cityhome.json byty_merged.json \
-         mapa_bytu.html ratings.json; do
+         mapa_bytu.html; do
    # Remove real file if it exists (e.g. baked into image)
    [ -f "/app/$f" ] && [ ! -L "/app/$f" ] && rm -f "/app/$f"
    ln -sf "$DATA_DIR/$f" "/app/$f"
@@ -18,8 +18,5 @@ crond -b -l 2
 echo "[entrypoint] Starting initial scrape in background..."
 bash /app/run_all.sh &
 echo "[entrypoint] Starting ratings API server on port 8081..."
 DATA_DIR="$DATA_DIR" python3 /app/ratings_server.py &
 echo "[entrypoint] Starting HTTP server on port 8080..."
 exec python3 -m http.server 8080 --directory "$DATA_DIR"
--- a/byty_merged.json
+++ b/byty_merged.json
--- a/generate_status.py
+++ b/generate_status.py
@@ -1,202 +0,0 @@
 #!/usr/bin/env python3
 """Generate status.json from scraper JSON outputs and run log."""
 from __future__ import annotations
 import json
 import os
 import re
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Optional
 HERE = Path(__file__).parent
 SOURCE_FILES = {
    "Sreality":    "byty_sreality.json",
    "Realingo":    "byty_realingo.json",
    "Bezrealitky": "byty_bezrealitky.json",
    "iDNES":       "byty_idnes.json",
    "PSN":         "byty_psn.json",
    "CityHome":    "byty_cityhome.json",
 }
 MERGED_FILE = "byty_merged.json"
 def count_source(path: Path) -> dict:
    """Read a scraper JSON and return accepted count + file mtime."""
    if not path.exists():
        return {"accepted": 0, "error": "soubor nenalezen"}
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
        mtime = datetime.fromtimestamp(path.stat().st_mtime).isoformat(timespec="seconds")
        return {"accepted": len(data), "updated_at": mtime}
    except Exception as e:
        return {"accepted": 0, "error": str(e)}
 def parse_log(log_path: str) -> dict[str, dict]:
    """Parse scraper run log and extract per-source statistics.
    Scrapers log summary lines like:
      ✓ Vyhovující byty:    12
      Vyloučeno (prodáno):   5
      Staženo stránek:       3
      Staženo inzerátů:      48
      Celkem bytů v cache:   120
    and section headers like:
      [2/6] Realingo
    """
    if not log_path or not os.path.exists(log_path):
        return {}
    with open(log_path, encoding="utf-8") as f:
        content = f.read()
    # Split into per-source sections by the [N/6] Step header
    # Each section header looks like "[2/6] Realingo\n----..."
    section_pattern = re.compile(r'\[(\d+)/\d+\]\s+(.+)\n-+', re.MULTILINE)
    sections_found = list(section_pattern.finditer(content))
    if not sections_found:
        return {}
    stats = {}
    for i, match in enumerate(sections_found):
        step_name = match.group(2).strip()
        start = match.end()
        end = sections_found[i + 1].start() if i + 1 < len(sections_found) else len(content)
        section_text = content[start:end]
        # Identify which sources this section covers
        # "PSN + CityHome" covers both
        source_names = []
        for name in SOURCE_FILES:
            if name.lower() in step_name.lower():
                source_names.append(name)
        if not source_names:
            continue
        # Parse numeric summary lines
        def extract(pattern: str) -> Optional[int]:
            m = re.search(pattern, section_text)
            return int(m.group(1)) if m else None
        # Lines present in all/most scrapers
        accepted = extract(r'Vyhovující byty[:\s]+(\d+)')
        fetched = extract(r'Staženo inzerátů[:\s]+(\d+)')
        pages = extract(r'Staženo stránek[:\s]+(\d+)')
        cached = extract(r'Celkem bytů v cache[:\s]+(\d+)')
        cache_hits = extract(r'Cache hit[:\s]+(\d+)')
        # Rejection reasons — collect all into a dict
        excluded = {}
        for m in re.finditer(r'Vyloučeno\s+\(([^)]+)\)[:\s]+(\d+)', section_text):
            excluded[m.group(1)] = int(m.group(2))
        # Also PSN-style "Vyloučeno (prodáno): N"
        total_excluded = sum(excluded.values()) if excluded else extract(r'Vyloučen\w*[:\s]+(\d+)')
        entry = {}
        if accepted is not None:
            entry["accepted"] = accepted
        if fetched is not None:
            entry["fetched"] = fetched
        if pages is not None:
            entry["pages"] = pages
        if cached is not None:
            entry["cached"] = cached
        if cache_hits is not None:
            entry["cache_hits"] = cache_hits
        if excluded:
            entry["excluded"] = excluded
        elif total_excluded is not None:
            entry["excluded_total"] = total_excluded
        for name in source_names:
            stats[name] = entry
    return stats
 def main():
    start_time = None
    duration_sec = None
    if len(sys.argv) >= 3:
        start_time = sys.argv[1]
        try:
            duration_sec = int(sys.argv[2])
        except ValueError:
            pass
    if not start_time:
        start_time = datetime.now().isoformat(timespec="seconds")
    log_path = sys.argv[3] if len(sys.argv) >= 4 else None
    log_stats = parse_log(log_path)
    sources = []
    for name, filename in SOURCE_FILES.items():
        path = HERE / filename
        info = count_source(path)
        info["name"] = name
        # Merge log stats
        ls = log_stats.get(name, {})
        for k in ("fetched", "pages", "cached", "cache_hits", "excluded", "excluded_total"):
            if k in ls:
                info[k] = ls[k]
        # Override accepted from log if available (log is authoritative for latest run)
        if "accepted" in ls:
            info["accepted"] = ls["accepted"]
        sources.append(info)
    # Total accepted before dedup
    total_accepted = sum(s.get("accepted", 0) for s in sources)
    # Merged / deduplicated count
    merged_path = HERE / MERGED_FILE
    deduplicated = 0
    if merged_path.exists():
        try:
            merged = json.loads(merged_path.read_text(encoding="utf-8"))
            deduplicated = len(merged)
        except Exception:
            pass
    duplicates_removed = total_accepted - deduplicated if deduplicated else 0
    status = {
        "status": "done",
        "timestamp": start_time,
        "duration_sec": duration_sec,
        "total_accepted": total_accepted,
        "deduplicated": deduplicated,
        "duplicates_removed": duplicates_removed,
        "sources": sources,
    }
    out = HERE / "status.json"
    out.write_text(json.dumps(status, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"Status uložen: {out}")
    print(f"  Celkem bytů (před dedup): {total_accepted}")
    print(f"  Po deduplikaci:           {deduplicated}")
    if duplicates_removed:
        print(f"  Odstraněno duplikátů:     {duplicates_removed}")
    for s in sources:
        acc = s.get("accepted", 0)
        err = s.get("error", "")
        exc = s.get("excluded", {})
        exc_total = sum(exc.values()) if exc else s.get("excluded_total", 0)
        parts = [f"{s['name']:12s}: {acc} bytů"]
        if exc_total:
            parts.append(f"({exc_total} vyloučeno)")
        if err:
            parts.append(f"[CHYBA: {err}]")
        print("  " + "  ".join(parts))
 if __name__ == "__main__":
    main()
--- a/mapa_bytu.html
+++ b/mapa_bytu.html
--- a/merge_and_map.py
+++ b/merge_and_map.py
@@ -79,6 +79,19 @@ def main():
        if key in seen_keys:
            dupes += 1
            existing = seen_keys[key]
            # Merge timestamps: keep earliest first_seen, latest last_updated
            e_first = e.get("first_seen", "")
            ex_first = existing.get("first_seen", "")
            if e_first and ex_first:
                existing["first_seen"] = min(e_first, ex_first)
            elif e_first:
                existing["first_seen"] = e_first
            e_updated = e.get("last_updated", "")
            ex_updated = existing.get("last_updated", "")
            if e_updated and ex_updated:
                existing["last_updated"] = max(e_updated, ex_updated)
            elif e_updated:
                existing["last_updated"] = e_updated
            # Log it
            print(f"  Duplikát: {e['locality']} | {format_price(e['price'])} | {e.get('area', '?')} m² "
                  f"({e.get('source', '?')} vs {existing.get('source', '?')})")
--- a/ratings_server.py
+++ b/ratings_server.py
@@ -1,116 +0,0 @@
 #!/usr/bin/env python3
 """
 Minimal HTTP API server for persisting apartment ratings.
 GET  /api/ratings        → returns ratings.json contents
 POST /api/ratings        → saves entire ratings object
 GET  /api/ratings/export → same as GET, but with download header
 Ratings file: /app/data/ratings.json (or ./ratings.json locally)
 """
 import json
 import logging
 import os
 import sys
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from pathlib import Path
 PORT = int(os.environ.get("RATINGS_PORT", 8081))
 DATA_DIR = Path(os.environ.get("DATA_DIR", "."))
 RATINGS_FILE = DATA_DIR / "ratings.json"
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [ratings] %(levelname)s %(message)s",
    datefmt="%Y-%m-%dT%H:%M:%S",
 )
 log = logging.getLogger(__name__)
 def load_ratings() -> dict:
    try:
        if RATINGS_FILE.exists():
            return json.loads(RATINGS_FILE.read_text(encoding="utf-8"))
    except Exception as e:
        log.error("Failed to load ratings: %s", e)
    return {}
 def save_ratings(data: dict) -> None:
    RATINGS_FILE.write_text(
        json.dumps(data, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
 class RatingsHandler(BaseHTTPRequestHandler):
    def log_message(self, format, *args):
        # Suppress default HTTP access log (we use our own)
        pass
    def _send_json(self, status: int, body: dict, extra_headers=None):
        payload = json.dumps(body, ensure_ascii=False).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.send_header("Content-Length", str(len(payload)))
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type")
        if extra_headers:
            for k, v in extra_headers.items():
                self.send_header(k, v)
        self.end_headers()
        self.wfile.write(payload)
    def do_OPTIONS(self):
        # CORS preflight
        self.send_response(204)
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type")
        self.end_headers()
    def do_GET(self):
        if self.path in ("/api/ratings", "/api/ratings/export"):
            ratings = load_ratings()
            extra = None
            if self.path == "/api/ratings/export":
                extra = {"Content-Disposition": 'attachment; filename="ratings.json"'}
            log.info("GET %s → %d ratings", self.path, len(ratings))
            self._send_json(200, ratings, extra)
        else:
            self._send_json(404, {"error": "not found"})
    def do_POST(self):
        if self.path == "/api/ratings":
            length = int(self.headers.get("Content-Length", 0))
            if length == 0:
                self._send_json(400, {"error": "empty body"})
                return
            try:
                raw = self.rfile.read(length)
                data = json.loads(raw.decode("utf-8"))
            except Exception as e:
                log.warning("Bad request body: %s", e)
                self._send_json(400, {"error": "invalid JSON"})
                return
            if not isinstance(data, dict):
                self._send_json(400, {"error": "expected JSON object"})
                return
            save_ratings(data)
            log.info("POST /api/ratings → saved %d ratings", len(data))
            self._send_json(200, {"ok": True, "count": len(data)})
        else:
            self._send_json(404, {"error": "not found"})
 if __name__ == "__main__":
    log.info("Ratings server starting on port %d, data dir: %s", PORT, DATA_DIR)
    log.info("Ratings file: %s", RATINGS_FILE)
    server = HTTPServer(("0.0.0.0", PORT), RatingsHandler)
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        log.info("Stopped.")
        sys.exit(0)
--- a/run_all.sh
+++ b/run_all.sh
@@ -16,12 +16,6 @@ NC='\033[0m'
 TOTAL=6
 CURRENT=0
 FAILED=0
 START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
 START_EPOCH=$(date +%s)
 LOG_FILE="$(pwd)/scrape_run.log"
 # Mark status as running
 echo '{"status":"running"}' > status.json
 show_help() {
    echo "Usage: ./run_all.sh [OPTIONS]"
@@ -69,8 +63,6 @@ step() {
 }
 # ── Scrapery (paralelně kde to jde) ─────────────────────────
 # Tee all output to log file for status generation
 exec > >(tee -a "$LOG_FILE") 2>&1
 step "Sreality"
 python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
@@ -99,12 +91,6 @@ python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((F
 # ── Otevření mapy ────────────────────────────────────────────
 # ── Generování statusu ─────────────────────────────────────
 END_EPOCH=$(date +%s)
 DURATION=$((END_EPOCH - START_EPOCH))
 python3 generate_status.py "$START_TIME" "$DURATION" "$LOG_FILE"
 echo ""
 echo "============================================================"
 if [ $FAILED -eq 0 ]; then
--- a/scrape_and_map.py
+++ b/scrape_and_map.py
@@ -272,9 +272,13 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        # Check cache — if hash_id exists and price unchanged, reuse
        cached = cache.get(hash_id)
        today = datetime.now().strftime("%Y-%m-%d")
        if cached and cached.get("price") == estate.get("price", 0):
            cache_hits += 1
            logger.debug(f"Cache hit for hash_id={hash_id}")
            cached["last_updated"] = today
            if "first_seen" not in cached:
                cached["first_seen"] = today
            results.append(cached)
            continue
@@ -332,6 +336,11 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        disp_cb = estate.get("_disposition_cb") or estate.get("seo", {}).get("category_sub_cb")
        seo = estate.get("seo", {})
        # Preserve first_seen from cache if this is a price-changed re-fetch
        first_seen = today
        if cached and "first_seen" in cached:
            first_seen = cached["first_seen"]
        result = {
            "hash_id": hash_id,
            "name": estate.get("name", ""),
@@ -347,7 +356,8 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            "ownership": ownership,
            "url": sreality_url(hash_id, seo),
            "image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        details_fetched += 1
@@ -374,58 +384,26 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
 def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
    """Generate an interactive Leaflet.js HTML map."""
-    # Color by price per m² — cool blue→warm red scale, no yellow
+    # Color by disposition
-    # Thresholds based on Prague market distribution (p25=120k, p50=144k, p75=162k)
+    color_map = {
-    price_color_scale = [
+        "3+kk": "#2196F3",   # blue
-        (110_000, "#1565C0"),   # < 110k/m²  → deep blue (levné)
+        "3+1": "#4CAF50",    # green
-        (130_000, "#42A5F5"),   # 110–130k   → light blue
+        "4+kk": "#FF9800",   # orange
-        (150_000, "#66BB6A"),   # 130–150k   → green (střed)
+        "4+1": "#F44336",    # red
-        (165_000, "#EF6C00"),   # 150–165k   → dark orange
+        "5+kk": "#9C27B0",   # purple
-        (float("inf"), "#C62828"),  # > 165k → dark red (drahé)
+        "5+1": "#795548",    # brown
-    ]
+        "6+": "#607D8B",     # grey-blue
    }
-    def price_color(estate: dict) -> str:
+    def fmt_date(d):
-        price = estate.get("price") or 0
+        """Format ISO date (YYYY-MM-DD) to Czech format (DD.MM.YYYY)."""
-        area = estate.get("area") or 0
+        if d and len(d) == 10:
-        if not area:
+            return f"{d[8:10]}.{d[5:7]}.{d[:4]}"
-            return "#9E9E9E"
+        return ""
        ppm2 = price / area
        for threshold, color in price_color_scale:
            if ppm2 < threshold:
                return color
        return "#E53935"
    # Legend bands for info panel (built once)
    price_legend_items = (
        '<div style="margin-bottom:4px;font-size:12px;color:#555;font-weight:600;">Cena / m²:</div>'
    )
    bands = [
        ("#1565C0", "< 110 000 Kč/m²"),
        ("#42A5F5", "110 – 130 000 Kč/m²"),
        ("#66BB6A", "130 – 150 000 Kč/m²"),
        ("#EF6C00", "150 – 165 000 Kč/m²"),
        ("#C62828", "> 165 000 Kč/m²"),
        ("#9E9E9E", "cena/plocha neuvedena"),
    ]
    for bcolor, blabel in bands:
        price_legend_items += (
            f'<div style="display:flex;align-items:center;gap:6px;margin:2px 0;">'
            f'<span style="width:14px;height:14px;border-radius:50%;background:{bcolor};'
            f'display:inline-block;border:2px solid white;box-shadow:0 1px 3px rgba(0,0,0,0.3);flex-shrink:0;"></span>'
            f'<span>{blabel}</span></div>'
        )
    # New marker indicator — bigger dot, no extra border
    price_legend_items += (
        '<div style="display:flex;align-items:center;gap:6px;margin:6px 0 0 0;'
        'padding-top:6px;border-top:1px solid #eee;">'
        '<span style="width:18px;height:18px;border-radius:50%;background:#66BB6A;'
        'display:inline-block;box-shadow:0 1px 4px rgba(0,0,0,0.35);flex-shrink:0;"></span>'
        '<span>Nové (z dnešního scrapu) — větší</span></div>'
    )
    markers_js = ""
    for e in estates:
-        color = price_color(e)
+        color = color_map.get(e["disposition"], "#999999")
        floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
        area_text = f'{e["area"]} m²' if e["area"] else "neuvedeno"
        building_text = e["building_type"] or "neuvedeno"
@@ -443,15 +421,31 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
        source_color = source_colors.get(source, "#999")
        hash_id = e.get("hash_id", "")
        first_seen = e.get("first_seen", "")
        last_updated = e.get("last_updated", "")
-        scraped_at = e.get("scraped_at", "")
+        first_seen_fmt = fmt_date(first_seen)
-        is_new = scraped_at == datetime.now().strftime("%Y-%m-%d")
+        last_updated_fmt = fmt_date(last_updated)
        # "NOVÉ" badge if first_seen equals latest scrape date
        new_badge = ""
        if first_seen and first_seen == last_updated:
            new_badge = (
-            '<span style="margin-left:6px;font-size:11px;background:#FFD600;color:#333;'
+                '<span style="margin-left:6px;font-size:10px;background:#4CAF50;color:white;'
-            'padding:1px 6px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
+                'padding:1px 5px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
            if is_new else ""
            )
        # Date info line
        date_line = ""
        if first_seen_fmt:
            date_line = (
                f'<div style="margin-top:4px;font-size:11px;color:#888;">'
                f'Přidáno: {first_seen_fmt}'
            )
            if last_updated_fmt and last_updated != first_seen:
                date_line += f' · Aktualizace: {last_updated_fmt}'
            date_line += '</div>'
        popup = (
            f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
            f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
@@ -461,7 +455,8 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
            f'{floor_note}<br><br>'
            f'<b>{e["locality"]}</b><br>'
            f'Stavba: {building_text}<br>'
-            f'Vlastnictví: {ownership_text}<br><br>'
+            f'Vlastnictví: {ownership_text}'
            f'{date_line}<br>'
            f'<a href="{e["url"]}" target="_blank" '
            f'style="color:{source_color};text-decoration:none;font-weight:bold;">'
            f'→ Otevřít na {source_label}</a>'
@@ -485,32 +480,26 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
        popup = popup.replace("'", "\\'").replace("\n", "")
        is_fav = source in ("psn", "cityhome")
-
+        marker_fn = "addHeartMarker" if is_fav else "addMarker"
        if is_fav:
            marker_fn = "addHeartMarker"
        elif is_new:
            marker_fn = "addNewMarker"
        else:
            marker_fn = "addMarker"
        markers_js += (
-            f"  {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
+            f"  {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}', '{first_seen}');\n"
        )
-    # Build legend — price per m² bands + disposition counts
+    # Build legend
-    legend_items = price_legend_items
+    legend_items = ""
    # Disposition counts below the color legend
    disp_counts = {}
    for e in estates:
        d = e["disposition"]
        disp_counts[d] = disp_counts.get(d, 0) + 1
-    disp_order = ["3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+"]
+    for disp, color in color_map.items():
-    disp_summary = ", ".join(
+        count = disp_counts.get(disp, 0)
-        f"{d} ({disp_counts[d]})" for d in disp_order if d in disp_counts
+        if count > 0:
    )
            legend_items += (
-        f'<div style="margin-top:8px;padding-top:6px;border-top:1px solid #eee;'
+                f'<div style="display:flex;align-items:center;gap:6px;margin:3px 0;">'
-        f'font-size:12px;color:#666;">{disp_summary}</div>'
+                f'<span style="width:14px;height:14px;border-radius:50%;'
                f'background:{color};display:inline-block;border:2px solid white;'
                f'box-shadow:0 1px 3px rgba(0,0,0,0.3);"></span>'
                f'<span>{disp} ({count})</span></div>'
            )
    # Heart marker legend for PSN/CityHome
@@ -546,7 +535,6 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
  body {{ font-family: system-ui, -apple-system, sans-serif; }}
  #map {{ width: 100%; height: 100vh; }}
  .heart-icon {{ background: none !important; border: none !important; }}
  .star-icon {{ background: none !important; border: none !important; }}
  .rate-btn:hover {{ background: #f0f0f0 !important; }}
  .rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
  .rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
@@ -557,42 +545,13 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
  }}
  .marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
  .heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
-  .heart-icon-rej {{ opacity: 0.4 !important; filter: grayscale(1); }}
+  .heart-icon-rej {{ opacity: 0.2 !important; }}
  .reject-overlay {{ background: none !important; border: none !important; pointer-events: none !important; }}
  @keyframes pulse-new {{
    0%   {{ stroke-opacity: 1;   stroke-width: 3px; r: 11; }}
    50%  {{ stroke-opacity: 0.4; stroke-width: 6px; r: 12; }}
    100% {{ stroke-opacity: 1;   stroke-width: 3px; r: 11; }}
  }}
  .marker-new {{ animation: pulse-new 2s ease-in-out infinite; }}
  .info-panel {{
    position: absolute; top: 10px; right: 10px; z-index: 1000;
    background: white; padding: 16px; border-radius: 10px;
    box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
    font-size: 13px; line-height: 1.5;
    transition: transform 0.3s ease, opacity 0.3s ease;
  }}
  .info-panel.collapsed {{
    transform: translateX(calc(100% + 20px));
    opacity: 0; pointer-events: none;
  }}
  .panel-open-btn {{
    position: absolute; top: 10px; right: 10px; z-index: 1001;
    width: 40px; height: 40px; border-radius: 8px;
    background: white; border: none; cursor: pointer;
    box-shadow: 0 2px 12px rgba(0,0,0,0.15);
    font-size: 20px; display: flex; align-items: center; justify-content: center;
    transition: opacity 0.3s ease;
  }}
  .panel-open-btn.hidden {{ opacity: 0; pointer-events: none; }}
  .panel-close-btn {{
    position: absolute; top: 8px; right: 8px;
    width: 28px; height: 28px; border-radius: 6px;
    background: none; border: 1px solid #ddd; cursor: pointer;
    font-size: 16px; display: flex; align-items: center; justify-content: center;
    color: #888;
  }}
  .panel-close-btn:hover {{ background: #f0f0f0; color: #333; }}
  .info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
  .info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
  .filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
@@ -600,26 +559,18 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
  .filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
  #floor-filter {{ margin-top: 8px; }}
  #floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
  .status-link {{ display: block; margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; text-align: center; }}
  .status-link a {{ color: #1976D2; text-decoration: none; font-size: 12px; }}
  @media (max-width: 600px) {{
    .info-panel {{ max-width: calc(100vw - 60px); right: 10px; }}
    .info-panel.collapsed {{ transform: translateX(calc(100% + 20px)); }}
    .panel-close-btn {{ top: 6px; right: 6px; }}
  }}
 </style>
 </head>
 <body>
 <div id="map"></div>
-<button class="panel-open-btn hidden" id="panel-open-btn" onclick="togglePanel()">☰</button>
+<div class="info-panel">
 <div class="info-panel" id="info-panel">
  <button class="panel-close-btn" id="panel-close-btn" onclick="togglePanel()">✕</button>
  <h2>Byty v Praze</h2>
  <div class="stats">
    <div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
    <div>Cena: {min_price} — {max_price}</div>
    <div>Průměr: {avg_price}</div>
  </div>
  <div><b>Dispozice:</b></div>
  {legend_items}
  <div class="filter-section">
    <b>Filtry:</b>
@@ -643,6 +594,17 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
        </select>
      </label>
    </div>
    <div style="margin-top:6px;">
      <label>Přidáno:
        <select id="first-seen-filter" onchange="applyFilters()">
          <option value="all">Vše</option>
          <option value="1">Posledních 24h</option>
          <option value="3">Poslední 3 dny</option>
          <option value="7">Poslední týden</option>
          <option value="14">Posledních 14 dní</option>
        </select>
      </label>
    </div>
  </div>
  <div class="filter-section">
    <div id="rating-counts" style="margin-bottom:6px;font-size:12px;color:#666;">
@@ -653,7 +615,6 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
      Skrýt zamítnuté
    </label>
  </div>
  <div class="status-link"><a href="status.html">Scraper status</a></div>
 </div>
 <script>
@@ -675,7 +636,7 @@ L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_only_labels/{{z}}/{{x}}/{
 var allMarkers = [];
-function addMarker(lat, lon, color, popup, hashId) {{
+function addMarker(lat, lon, color, popup, hashId, firstSeen) {{
  var marker = L.circleMarker([lat, lon], {{
    radius: 8,
    fillColor: color,
@@ -684,28 +645,11 @@ function addMarker(lat, lon, color, popup, hashId) {{
    opacity: 1,
    fillOpacity: 0.85,
  }}).bindPopup(popup);
-  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId }};
+  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, firstSeen: firstSeen }};
  allMarkers.push(marker);
  marker.addTo(map);
 }}
 function addNewMarker(lat, lon, color, popup, hashId) {{
  var marker = L.circleMarker([lat, lon], {{
    radius: 12,
    fillColor: color,
    color: color,
    weight: 4,
    opacity: 0.35,
    fillOpacity: 0.95,
  }}).bindPopup(popup);
  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isNew: true }};
  allMarkers.push(marker);
  marker.addTo(map);
  marker.on('add', function() {{
    if (marker._path) marker._path.classList.add('marker-new');
  }});
 }}
 function heartIcon(color) {{
  var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
    + '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
@@ -721,26 +665,11 @@ function heartIcon(color) {{
  }});
 }}
-function starIcon() {{
+function addHeartMarker(lat, lon, color, popup, hashId, firstSeen) {{
  var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" viewBox="0 0 24 24">'
    + '<path d="M12 2l3.09 6.26L22 9.27l-5 4.87L18.18 22 12 18.27 '
    + '5.82 22 7 14.14 2 9.27l6.91-1.01L12 2z" '
    + 'fill="#FFC107" stroke="#F57F17" stroke-width="1" '
    + 'filter="drop-shadow(0 1px 3px rgba(0,0,0,0.3))"/></svg>';
  return L.divIcon({{
    html: svg,
    className: 'star-icon',
    iconSize: [28, 28],
    iconAnchor: [14, 14],
    popupAnchor: [0, -14],
  }});
 }}
 function addHeartMarker(lat, lon, color, popup, hashId) {{
  var marker = L.marker([lat, lon], {{
    icon: heartIcon(color),
  }}).bindPopup(popup);
-  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true }};
+  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true, firstSeen: firstSeen }};
  allMarkers.push(marker);
  marker.addTo(map);
 }}
@@ -761,36 +690,6 @@ function saveRatings(ratings) {{
  localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
 }}
 function addRejectStrike(marker) {{
  removeRejectStrike(marker);
  var color = marker._data.color || '#999';
  // SVG "no entry" icon — circle with diagonal line, colored to match marker
  var svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">'
    + '<circle cx="12" cy="12" r="10" fill="none" stroke="' + color + '" stroke-width="2.5" opacity="0.85"/>'
    + '<line x1="5.5" y1="5.5" x2="18.5" y2="18.5" stroke="' + color + '" stroke-width="2.5" stroke-linecap="round" opacity="0.85"/>'
    + '</svg>';
  var icon = L.divIcon({{
    className: 'reject-overlay',
    html: svg,
    iconSize: [20, 20],
    iconAnchor: [10, 10],
  }});
  var m = L.marker([marker._data.lat, marker._data.lon], {{
    icon: icon,
    interactive: false,
    pane: 'markerPane',
  }});
  m.addTo(map);
  marker._rejectStrike = m;
 }}
 function removeRejectStrike(marker) {{
  if (marker._rejectStrike) {{
    map.removeLayer(marker._rejectStrike);
    marker._rejectStrike = null;
  }}
 }}
 function applyMarkerStyle(marker, status) {{
  if (marker._data.isHeart) {{
    var el = marker._icon;
@@ -805,33 +704,16 @@ function applyMarkerStyle(marker, status) {{
    }}
  }} else {{
    if (status === 'fav') {{
      removeRejectStrike(marker);
      if (!marker._data._origCircle) marker._data._origCircle = true;
      var popup = marker.getPopup();
      var popupContent = popup ? popup.getContent() : '';
      var wasOnMap = map.hasLayer(marker);
      if (wasOnMap) map.removeLayer(marker);
      var starMarker = L.marker([marker._data.lat, marker._data.lon], {{
        icon: starIcon(),
      }}).bindPopup(popupContent);
      starMarker._data = marker._data;
      var idx = allMarkers.indexOf(marker);
      if (idx !== -1) allMarkers[idx] = starMarker;
      if (wasOnMap) starMarker.addTo(map);
    }} else if (status === 'reject') {{
      if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
        revertToCircle(marker, {{ radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1 }});
      }} else {{
      marker.setStyle({{
-          radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1,
+        radius: 12, fillOpacity: 1, weight: 3,
        fillColor: marker._data.color, color: '#fff',
      }});
      if (marker._path) marker._path.classList.add('marker-favorite');
    }} else if (status === 'reject') {{
      marker.setStyle({{
        radius: 6, fillOpacity: 0.15, fillColor: '#999', color: '#bbb', weight: 1,
      }});
      if (marker._path) marker._path.classList.remove('marker-favorite');
      }}
      // Add strikethrough line over the marker
      addRejectStrike(marker);
    }} else {{
      if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
        revertToCircle(marker, {{ radius: 8, fillColor: marker._data.color, color: '#fff', weight: 2, fillOpacity: 0.85 }});
    }} else {{
      marker.setStyle({{
        radius: 8, fillColor: marker._data.color, color: '#fff',
@@ -839,23 +721,7 @@ function applyMarkerStyle(marker, status) {{
      }});
      if (marker._path) marker._path.classList.remove('marker-favorite');
    }}
      if (marker._path) marker._path.classList.remove('marker-rejected');
      removeRejectStrike(marker);
  }}
  }}
 }}
 function revertToCircle(marker, style) {{
  var popup = marker.getPopup();
  var popupContent = popup ? popup.getContent() : '';
  var wasOnMap = map.hasLayer(marker);
  if (wasOnMap) map.removeLayer(marker);
  var cm = L.circleMarker([marker._data.lat, marker._data.lon], style).bindPopup(popupContent);
  cm._data = marker._data;
  delete cm._data._starRef;
  var idx = allMarkers.indexOf(marker);
  if (idx !== -1) allMarkers[idx] = cm;
  if (wasOnMap) cm.addTo(map);
 }}
 function rateMarker(marker, action) {{
@@ -994,13 +860,25 @@ map.on('popupopen', function(e) {{
 }});
 // ── Filters ────────────────────────────────────────────────────
 function daysAgoDate(days) {{
  var d = new Date();
  d.setDate(d.getDate() - days);
  return d.toISOString().slice(0, 10);
 }}
 function applyFilters() {{
  var minFloor = parseInt(document.getElementById('min-floor').value);
  var maxPrice = parseInt(document.getElementById('max-price').value);
  var hideRejected = document.getElementById('hide-rejected').checked;
  var firstSeenVal = document.getElementById('first-seen-filter').value;
  var ratings = loadRatings();
  var visible = 0;
  var minFirstSeen = '';
  if (firstSeenVal !== 'all') {{
    minFirstSeen = daysAgoDate(parseInt(firstSeenVal));
  }}
  allMarkers.forEach(function(m) {{
    var popup = m.getPopup().getContent();
    var floorMatch = popup.match(/(\\d+)\\. NP/);
@@ -1013,18 +891,19 @@ function applyFilters() {{
    if (floor !== null && floor < minFloor) show = false;
    if (price > maxPrice) show = false;
    // Date filter
    if (minFirstSeen && m._data.firstSeen) {{
      if (m._data.firstSeen < minFirstSeen) show = false;
    }}
    var r = ratings[m._data.hashId];
    if (hideRejected && r && r.status === 'reject') show = false;
    if (show) {{
      if (!map.hasLayer(m)) m.addTo(map);
      visible++;
      // Show strike line if rejected and visible
      if (m._rejectStrike && !map.hasLayer(m._rejectStrike)) m._rejectStrike.addTo(map);
    }} else {{
      if (map.hasLayer(m)) map.removeLayer(m);
      // Hide strike line when marker hidden
      if (m._rejectStrike && map.hasLayer(m._rejectStrike)) map.removeLayer(m._rejectStrike);
    }}
  }});
@@ -1042,26 +921,6 @@ function applyFilters() {{
 // Initialize ratings on load
 restoreRatings();
 // ── Panel toggle ──────────────────────────────────────────────
 function togglePanel() {{
  var panel = document.getElementById('info-panel');
  var openBtn = document.getElementById('panel-open-btn');
  var isOpen = !panel.classList.contains('collapsed');
  if (isOpen) {{
    panel.classList.add('collapsed');
    openBtn.classList.remove('hidden');
  }} else {{
    panel.classList.remove('collapsed');
    openBtn.classList.add('hidden');
  }}
 }}
 // On mobile, start with panel collapsed
 if (window.innerWidth <= 600) {{
  document.getElementById('info-panel').classList.add('collapsed');
  document.getElementById('panel-open-btn').classList.remove('hidden');
 }}
 </script>
 </body>
 </html>"""
--- a/scrape_bezrealitky.py
+++ b/scrape_bezrealitky.py
@@ -7,13 +7,13 @@ Výstup: byty_bezrealitky.json
 from __future__ import annotations
 import argparse
 from datetime import datetime
 import json
 import logging
 import math
 import re
 import time
 import urllib.request
 from datetime import datetime
 from pathlib import Path
 logger = logging.getLogger(__name__)
@@ -285,10 +285,14 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        # Check cache — if hash_id exists and price unchanged, reuse
        adv_id = int(adv["id"])
        adv_price = adv.get("price", 0) or 0
        today = datetime.now().strftime("%Y-%m-%d")
        cached = cache.get(adv_id)
        if cached and cached.get("price") == adv_price:
            cache_hits += 1
            logger.debug(f"Cache hit for id={adv_id}")
            cached["last_updated"] = today
            if "first_seen" not in cached:
                cached["first_seen"] = today
            results.append(cached)
            continue
@@ -340,6 +344,11 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        if not address:
            address = adv.get('address({"locale":"CS"})', "Praha")
        # Preserve first_seen from cache if this is a price-changed re-fetch
        first_seen = today
        if cached and "first_seen" in cached:
            first_seen = cached["first_seen"]
        result = {
            "hash_id": int(adv["id"]),
            "name": f"Prodej bytu {DISPOSITION_LABELS.get(disp, '?')} {adv.get('surface', '?')} m²",
@@ -356,7 +365,8 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            "url": f"{BASE_URL}/nemovitosti-byty-domy/{uri}",
            "source": "bezrealitky",
            "image": "",
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        properties_fetched += 1
--- a/scrape_cityhome.py
+++ b/scrape_cityhome.py
@@ -34,26 +34,24 @@ HEADERS = {
 BASE_URL = "https://www.city-home.cz"
-def fetch_url(url: str, retries: int = 3) -> str:
+def fetch_url(url: str) -> str:
-    """Fetch URL and return HTML string. Raises HTTPError on 4xx/5xx."""
+    """Fetch URL and return HTML string."""
-    for attempt in range(retries):
+    for attempt in range(3):
        try:
-            logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
+            logger.debug(f"HTTP GET request (attempt {attempt + 1}/3): {url}")
            logger.debug(f"Headers: {HEADERS}")
            req = urllib.request.Request(url, headers=HEADERS)
            resp = urllib.request.urlopen(req, timeout=30)
            html = resp.read().decode("utf-8")
            logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
            return html
        except urllib.error.HTTPError:
            # Don't retry on HTTP errors (404, 403, etc.) — re-raise immediately
            raise
        except (ConnectionResetError, ConnectionError, urllib.error.URLError) as e:
-            if attempt < retries - 1:
+            if attempt < 2:
                wait = (attempt + 1) * 2
-                logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
+                logger.warning(f"Connection error (retry {attempt + 1}/3 after {wait}s): {e}")
                time.sleep(wait)
            else:
-                logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
+                logger.error(f"HTTP request failed after 3 attempts: {e}", exc_info=True)
                raise
@@ -127,21 +125,31 @@ def parse_filter_page(html: str) -> list[dict]:
        if detail_url and not detail_url.startswith("http"):
            detail_url = BASE_URL + detail_url
-        # Parse table cells: [unit_name, unit_type_label, address, floor, disposition, area, transaction, price]
+        # Extract floor from cells — look for pattern like "3.NP" or "2.PP"
        cells = re.findall(r'<td[^>]*>(.*?)</td>', row_content, re.DOTALL)
        cell_texts = [re.sub(r'<[^>]+>', '', c).strip() for c in cells]
        # Cell[2] = address (e.g. "Žateckých 14"), cell[3] = floor (e.g. "3.NP")
        project_address = cell_texts[2] if len(cell_texts) > 2 else ""
        floor = None
-        if len(cell_texts) > 3:
+        floor_text = ""
-            np_match = re.search(r'(\d+)\.\s*NP', cell_texts[3])
+        project_name = ""
-            pp_match = re.search(r'(\d+)\.\s*PP', cell_texts[3])
+
        for cell in cells:
            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
            # Floor pattern
            np_match = re.search(r'(\d+)\.\s*NP', cell_text)
            pp_match = re.search(r'(\d+)\.\s*PP', cell_text)
            if np_match:
                floor = int(np_match.group(1))
                floor_text = cell_text
            elif pp_match:
-                floor = -int(pp_match.group(1))
+                floor = -int(pp_match.group(1))  # Underground
                floor_text = cell_text
        # Extract project name — usually in a cell that's not a number/price/floor
        for cell in cells:
            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
            if cell_text and not re.match(r'^[\d\s.,]+$', cell_text) and "NP" not in cell_text and "PP" not in cell_text and "m²" not in cell_text and "Kč" not in cell_text and "EUR" not in cell_text and "CZK" not in cell_text:
                if len(cell_text) > 3 and cell_text != unit_name:
                    project_name = cell_text
                    break
        listing = {
            "price": int(cena.group(1)),
@@ -151,58 +159,43 @@ def parse_filter_page(html: str) -> list[dict]:
            "project_id": project.group(1) if project else "",
            "transaction": transaction.group(1) if transaction else "",
            "disposition": dispozition.group(1) if dispozition else "",
            "location": location.group(1) if location else "",
            "url": detail_url,
            "unit_name": unit_name,
            "floor": floor,
-            "project_address": project_address,
+            "project_name": project_name,
        }
        listings.append(listing)
    return listings
-def get_lokalita_urls(slug: str) -> list[str]:
+def extract_project_gps(html: str) -> dict[str, tuple[float, float]]:
-    """Return candidate lokalita URLs to try in order."""
+    """Extract GPS coordinates for projects from locality pages."""
-    return [
+    # Pattern in JS: ['<h4>Project Name</h4>...', 'LAT', 'LON', '1', 'Name']
-        f"{BASE_URL}/projekty/{slug}/lokalita",
+    gps_data = {}
-        f"{BASE_URL}/bytove-domy/{slug}/lokalita",
+    for match in re.finditer(r"\['[^']*<h4>([^<]+)</h4>[^']*',\s*'([\d.]+)',\s*'([\d.]+)'", html):
-        f"{BASE_URL}/bytove-domy/{slug}/lokalita1",
+        name = match.group(1).strip()
-    ]
+        lat = float(match.group(2))
        lon = float(match.group(3))
        gps_data[name] = (lat, lon)
    return gps_data
-def extract_project_gps(html: str) -> tuple[float, float] | None:
+def load_previous(json_path: str = "byty_cityhome.json") -> dict[str, str]:
-    """Extract project GPS from lokalita page JS variable.
+    """Load first_seen dates from previous run, keyed by hash_id."""
-
+    path = Path(json_path)
-    The page contains: var locations = [['<h4>Name</h4>...', 'LAT', 'LNG', 'CATEGORY', 'Label'], ...]
+    if not path.exists():
-    Category '1' = the project's own marker. Some projects have two cat-1 entries (data error);
+        return {}
-    in that case we pick the one whose name contains a digit and is not a transit landmark.
+    try:
-    """
+        data = json.loads(path.read_text(encoding="utf-8"))
-    block = re.search(r'var locations\s*=\s*\[(.*?)\];', html, re.DOTALL)
+        return {str(e["hash_id"]): e.get("first_seen", "") for e in data if "hash_id" in e}
-    if not block:
+    except (json.JSONDecodeError, KeyError):
-        return None
+        return {}
    entries = re.findall(
        r"'<h4>(.*?)</h4>.*?',\s*'([\d.]+)',\s*'([\d.]+)',\s*'1'",
        block.group(0),
        re.DOTALL,
    )
    if not entries:
        return None
    if len(entries) == 1:
        return float(entries[0][1]), float(entries[0][2])
    # Multiple cat-1 entries: pick the real project marker
    transit_re = re.compile(r'nádraží|park|metro|tramvaj|autobus|zastávka', re.IGNORECASE)
    for name, lat, lng in entries:
        if re.search(r'\d', name) and not transit_re.search(name):
            return float(lat), float(lng)
    # Fallback: first entry
    return float(entries[0][1]), float(entries[0][2])
 def scrape(max_pages: int | None = None, max_properties: int | None = None):
    previous_first_seen = load_previous()
    logger.info("=" * 60)
    logger.info("Stahuji inzeráty z CityHome (city-home.cz)")
    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
@@ -231,24 +224,22 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    # Fetch GPS for each project from locality pages
    project_gps = {}
    for slug in sorted(project_slugs):
-        time.sleep(0.3)
+        time.sleep(0.5)
        gps = None
        for url in get_lokalita_urls(slug):
        try:
-                logger.debug(f"Fetching project GPS: {url}")
+            locality_url = f"{BASE_URL}/projekty/{slug}/lokalita"
-                loc_html = fetch_url(url)
+            logger.debug(f"Fetching project GPS: {locality_url}")
            loc_html = fetch_url(locality_url)
            gps = extract_project_gps(loc_html)
            if gps:
-                    break
+                # Take first entry (the project itself)
-            except Exception as e:
+                first_name, (lat, lon) = next(iter(gps.items()))
-                logger.debug(f"GPS fetch failed for {url}: {e}")
+                project_gps[slug] = (lat, lon)
-                continue
+                logger.info(f"✓ {slug}: {lat}, {lon}")
        if gps:
            project_gps[slug] = gps
            logger.info(f"✓ {slug}: {gps[0]}, {gps[1]}")
            else:
                logger.info(f"✗ {slug}: GPS nenalezeno")
        except Exception as e:
            logger.warning(f"Error fetching GPS for {slug}: {e}", exc_info=True)
            logger.info(f"✗ {slug}: chyba ({e})")
    # Step 3: Filter listings
    logger.info(f"\nFáze 3: Filtrování...")
@@ -326,37 +317,28 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        lat, lon = gps
-        # locality: use project address from cell (e.g. "Žateckých 14") + city from GPS lookup
+        today = datetime.now().strftime("%Y-%m-%d")
-        project_address = listing.get("project_address", "")
+        hash_id = f"cityhome_{slug}_{listing['unit_name']}"
-        # derive city from slug (GPS lookup key)
+        first_seen = previous_first_seen.get(str(hash_id), "") or today
        city_map = {
            "karlinske-namesti-5": "Praha 8",
            "melnicka-12": "Praha 7",
            "na-vaclavce-34": "Praha 5",
            "nad-kajetankou-12": "Praha 6",
            "vosmikovych-3": "Praha 9",
            "zateckych-14": "Praha 2",
        }
        city_str = city_map.get(slug, "Praha")
        locality_str = f"{project_address}, {city_str}" if project_address else city_str
        result = {
-            "hash_id": f"cityhome_{slug}_{listing['unit_name']}",
+            "hash_id": hash_id,
-            "name": f"Prodej bytu {disp}, {int(area)} m² — {project_address}",
+            "name": f"Prodej bytu {disp} {area} m² — {listing['project_name']}",
            "price": price,
            "price_formatted": format_price(price),
-            "locality": locality_str,
+            "locality": f"{listing['project_name']}, Praha",
            "lat": lat,
            "lon": lon,
            "disposition": disp,
            "floor": floor,
-            "area": float(area),
+            "area": area,
            "building_type": "Cihlová",  # CityHome renovuje cihlové domy
            "ownership": "neuvedeno",
            "url": url,
            "source": "cityhome",
            "image": "",
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        properties_fetched += 1
--- a/scrape_idnes.py
+++ b/scrape_idnes.py
@@ -7,7 +7,6 @@ Výstup: byty_idnes.json
 from __future__ import annotations
 import argparse
 from datetime import datetime
 import json
 import logging
 import math
@@ -15,6 +14,7 @@ import re
 import time
 import urllib.request
 import urllib.parse
 from datetime import datetime
 from html.parser import HTMLParser
 from pathlib import Path
@@ -379,10 +379,14 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            logger.debug(f"Max properties limit reached: {max_properties}")
            break
        # Check cache — if hash_id exists and price unchanged, reuse
        today = datetime.now().strftime("%Y-%m-%d")
        cached = cache.get(str(item["id"]))
        if cached and cached.get("price") == item["price"]:
            cache_hits += 1
            logger.debug(f"Cache hit for id={item['id']}")
            cached["last_updated"] = today
            if "first_seen" not in cached:
                cached["first_seen"] = today
            results.append(cached)
            continue
@@ -443,6 +447,11 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            else:
                building_type = construction.capitalize()
        # Preserve first_seen from cache if this is a price-changed re-fetch
        first_seen = today
        if cached and "first_seen" in cached:
            first_seen = cached["first_seen"]
        result = {
            "hash_id": item["id"],
            "name": f"Prodej bytu {item['disposition']} {item.get('area', '?')} m²",
@@ -459,7 +468,8 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            "url": item["url"],
            "source": "idnes",
            "image": "",
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        properties_fetched += 1
--- a/scrape_psn.py
+++ b/scrape_psn.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 PSN.cz scraper.
-Stáhne byty na prodej z API /api/units-list — jeden požadavek, žádné stránkování.
+Stáhne byty na prodej v Praze z projektů PSN a vyfiltruje podle kritérií.
 Výstup: byty_psn.json
 """
 from __future__ import annotations
@@ -14,7 +14,6 @@ import subprocess
 import time
 from datetime import datetime
 from pathlib import Path
 from urllib.parse import urlencode
 logger = logging.getLogger(__name__)
@@ -24,37 +23,82 @@ MAX_PRICE = 14_000_000
 MIN_AREA = 69
 MIN_FLOOR = 2
-WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1", "5+kk a větší"}
+WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
 # Pouze Praha — ostatní města (Brno, Pardubice, Špindlerův Mlýn) přeskočit
 WANTED_CITIES = {"Praha"}
 UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 BASE_URL = "https://psn.cz"
-UNITS_API = f"{BASE_URL}/api/units-list"
+
 # Known Prague project slugs with GPS (from research)
 PRAGUE_PROJECTS = [
    {"slug": "zit-branik", "name": "Žít Braník", "lat": 50.0353, "lon": 14.4125},
    {"slug": "rostislavova-4", "name": "Rostislavova 4", "lat": 50.0620, "lon": 14.4463},
    {"slug": "pod-drinopolem", "name": "Pod Drinopolem", "lat": 50.0851, "lon": 14.3720},
    {"slug": "skyline-chodov", "name": "Skyline Chodov", "lat": 50.0418, "lon": 14.4990},
    {"slug": "jitro", "name": "Jitro", "lat": 50.0729, "lon": 14.4768},
    {"slug": "maroldka", "name": "Maroldka", "lat": 50.0614, "lon": 14.4517},
    {"slug": "belehradska-29", "name": "Bělehradská 29", "lat": 50.0682, "lon": 14.4348},
    {"slug": "jeseniova-93", "name": "Jeseniova 93", "lat": 50.0887, "lon": 14.4692},
    {"slug": "vanguard", "name": "Vanguard", "lat": 50.0164, "lon": 14.4036},
    {"slug": "vinohradska-160", "name": "Vinohradská 160", "lat": 50.0780, "lon": 14.4653},
    {"slug": "hermanova24", "name": "Heřmanova 24", "lat": 50.1009, "lon": 14.4313},
    {"slug": "vinohradska-8", "name": "Vinohradská 8", "lat": 50.0787, "lon": 14.4342},
    {"slug": "bydleni-na-vysinach", "name": "Bydlení Na Výšinách", "lat": 50.1003, "lon": 14.4187},
    {"slug": "bydleni-u-pekaren", "name": "Bydlení U Pekáren", "lat": 50.0555, "lon": 14.5414},
    {"slug": "pechackova-6", "name": "Pechackova 6", "lat": 50.0734, "lon": 14.4063},
    {"slug": "ahoj-vanguard", "name": "Ahoj Vanguard", "lat": 50.0164, "lon": 14.4033},
 ]
-def fetch_json(url: str) -> dict:
+def fetch_url(url: str) -> str:
-    """Fetch JSON via curl (urllib SSL may fail on Cloudflare)."""
+    """Fetch URL via curl (urllib SSL too old for Cloudflare)."""
-    logger.debug(f"HTTP GET: {url}")
+    logger.debug(f"HTTP GET request (via curl): {url}")
    logger.debug(f"User-Agent: {UA}")
    result = subprocess.run(
        ["curl", "-s", "-L", "--max-time", "30",
         "-H", f"User-Agent: {UA}",
-         "-H", "Accept: application/json",
+         "-H", "Accept: text/html",
         url],
        capture_output=True, text=True, timeout=60
    )
    if result.returncode != 0:
        logger.error(f"curl failed (return code {result.returncode}): {result.stderr[:200]}")
        raise RuntimeError(f"curl failed ({result.returncode}): {result.stderr[:200]}")
-    return json.loads(result.stdout)
+    logger.debug(f"HTTP response: size={len(result.stdout)} bytes")
    return result.stdout
-def fix_gps(lat, lng):
+def extract_units_from_html(html: str) -> list[dict]:
-    """PSN má u některých projektů prohozené lat/lng — opravíme."""
+    """Extract unit JSON objects from raw HTML with escaped quotes."""
-    if lat is not None and lng is not None and lat < 20 and lng > 20:
+    # The HTML contains RSC data with escaped JSON: \\"key\\":\\"value\\"
-        return lng, lat
+    # Step 1: Unescape the double-backslash-quotes to regular quotes
-    return lat, lng
+    cleaned = html.replace('\\"', '"')
    # Step 2: Find each unit by looking for "title":"Byt and walking back to {
    units = []
    decoder = json.JSONDecoder()
    for m in re.finditer(r'"title":"Byt', cleaned):
        pos = m.start()
        # Walk backwards to find the opening brace
        depth = 0
        found = False
        for i in range(pos - 1, max(pos - 3000, 0), -1):
            if cleaned[i] == '}':
                depth += 1
            elif cleaned[i] == '{':
                if depth == 0:
                    try:
                        obj, end = decoder.raw_decode(cleaned, i)
                        if isinstance(obj, dict) and 'price_czk' in obj:
                            units.append(obj)
                            found = True
                    except (json.JSONDecodeError, ValueError):
                        pass
                    break
                depth -= 1
    return units
 def format_price(price: int) -> str:
@@ -66,178 +110,228 @@ def format_price(price: int) -> str:
    return " ".join(reversed(parts)) + " Kč"
-def scrape(max_properties: int | None = None):
+def load_previous(json_path: str = "byty_psn.json") -> dict[str, str]:
    """Load first_seen dates from previous run, keyed by hash_id."""
    path = Path(json_path)
    if not path.exists():
        return {}
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
        return {str(e["hash_id"]): e.get("first_seen", "") for e in data if "hash_id" in e}
    except (json.JSONDecodeError, KeyError):
        return {}
 def scrape(max_pages: int | None = None, max_properties: int | None = None):
    previous_first_seen = load_previous()
    logger.info("=" * 60)
    logger.info("Stahuji inzeráty z PSN.cz")
    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
    logger.info(f"Min. plocha: {MIN_AREA} m²")
    logger.info(f"Patro: od {MIN_FLOOR}. NP")
-    logger.info(f"Region: Praha")
+    logger.info(f"Region: Praha ({len(PRAGUE_PROJECTS)} projektů)")
    if max_pages:
        logger.info(f"Max. stran: {max_pages}")
    if max_properties:
        logger.info(f"Max. bytů: {max_properties}")
    logger.info("=" * 60)
-    # Jediný API požadavek — vrátí všechny jednotky (cca 236)
+    # Fetch units from each Prague project
-    params = urlencode({
+    all_units = []
-        "locale": "cs",
+
-        "filters": "{}",
+    for proj in PRAGUE_PROJECTS:
-        "type": "list",
+        page = 1
-        "order": "price-asc",
+        project_units = []
-        "offset": 0,
+
-        "limit": 500,
+        while True:
-    })
+            if max_pages and page > max_pages:
-    url = f"{UNITS_API}?{params}"
+                logger.debug(f"Max pages limit reached: {max_pages}")
-    logger.info("Stahuji jednotky z API ...")
+                break
            url = f"{BASE_URL}/projekt/{proj['slug']}?page={page}"
            logger.info(f"{proj['name']} — strana {page} ...")
            time.sleep(0.5)
            try:
-        data = fetch_json(url)
+                html = fetch_url(url)
            except Exception as e:
-        logger.error(f"Chyba při stahování: {e}", exc_info=True)
+                logger.error(f"Fetch error for {proj['name']}: {e}", exc_info=True)
        return []
    all_units = data.get("units", {}).get("data", [])
    logger.info(f"Staženo jednotek celkem: {len(all_units)}")
    # Filtrování
    results = []
    excluded = {
        "prodáno": 0,
        "typ": 0,
        "město": 0,
        "dispozice": 0,
        "cena": 0,
        "plocha": 0,
        "patro": 0,
    }
    properties_fetched = 0
    for unit in all_units:
        if max_properties and properties_fetched >= max_properties:
                break
-        unit_id = unit.get("id", "?")
+            units = extract_units_from_html(html)
            logger.debug(f"Project {proj['slug']} page {page}: extracted {len(units)} units")
-        # Pouze prodej bytů (type_id=0)
+            if not units:
-        if unit.get("type_id") != 0:
+                if page == 1:
-            excluded["typ"] += 1
+                    logger.info(f"→ 0 jednotek")
-            logger.debug(f"id={unit_id}: přeskočen (type_id={unit.get('type_id')}, není prodej bytu)")
+                break
            continue
-        # Pouze volné (ne rezervované, prodané, v přípravě)
+            # Add project info to each unit
-        sale_status = unit.get("sale_status", "")
+            for unit in units:
                if not unit.get("latitude") or not unit.get("longitude"):
                    unit["latitude"] = proj["lat"]
                    unit["longitude"] = proj["lon"]
                unit["_project_name"] = proj["name"]
                unit["_project_slug"] = proj["slug"]
            project_units.extend(units)
            if page == 1:
                logger.info(f"→ {len(units)} jednotek na stránce")
            # Check if there might be more pages
            # If we got fewer than expected or same units, stop
            if len(units) < 10:
                break
            page += 1
            if page > 10:  # Safety limit
                break
        all_units.extend(project_units)
    # Deduplicate by slug
    seen_slugs = set()
    unique_units = []
    for u in all_units:
        slug = u.get("slug", "")
        if slug and slug not in seen_slugs:
            seen_slugs.add(slug)
            unique_units.append(u)
        elif not slug:
            unique_units.append(u)
    logger.info(f"\nStaženo celkem: {len(unique_units)} unikátních jednotek")
    # Filter
    logger.info(f"\nFiltrování...")
    results = []
    excluded_sold = 0
    excluded_type = 0
    excluded_disp = 0
    excluded_price = 0
    excluded_area = 0
    excluded_floor = 0
    excluded_panel = 0
    properties_fetched = 0
    for unit in unique_units:
        if max_properties and properties_fetched >= max_properties:
            logger.debug(f"Max properties limit reached: {max_properties}")
            break
        unit_id = unit.get("id", unit.get("slug", "unknown"))
        # Only free units
        is_free = unit.get("is_free", False)
        is_sold = unit.get("is_sold", False)
        if is_sold or not is_free:
-            excluded["prodáno"] += 1
+            excluded_sold += 1
-            logger.debug(f"id={unit_id}: přeskočen (status={sale_status})")
+            logger.debug(f"Filter: id={unit_id} - excluded (sold/not free)")
            continue
-        # Pouze Praha
+        # Only apartments
-        city = (unit.get("location") or unit.get("address", {}).get("city") or "").strip()
+        category = str(unit.get("category", "")).lower()
-        # location field je typicky "Praha 4", "Praha 7" atd.
+        if "byt" not in category and "ateliér" not in category:
-        city_base = city.split(" ")[0] if city else ""
+            excluded_type += 1
-        if city_base not in WANTED_CITIES:
+            logger.debug(f"Filter: id={unit_id} - excluded (not apartment, category={category})")
            excluded["město"] += 1
            logger.debug(f"id={unit_id}: přeskočen (město={city})")
            continue
-        # Dispozice
+        # Disposition
        disp = unit.get("disposition", "")
        if disp not in WANTED_DISPOSITIONS:
-            excluded["dispozice"] += 1
+            excluded_disp += 1
-            logger.debug(f"id={unit_id}: přeskočen (dispozice={disp})")
+            logger.debug(f"Filter: id={unit_id} - excluded (disposition {disp})")
            continue
-        # Cena
+        # Price
-        price = unit.get("action_price_czk") or unit.get("price_czk") or 0
+        price = unit.get("price_czk") or unit.get("action_price_czk") or 0
-        if not price or price <= 0 or price > MAX_PRICE:
+        if price <= 0 or price > MAX_PRICE:
-            excluded["cena"] += 1
+            excluded_price += 1
-            logger.debug(f"id={unit_id}: přeskočen (cena={price})")
+            logger.debug(f"Filter: id={unit_id} - excluded (price {price})")
            continue
-        # Plocha
+        # Area
        area = unit.get("total_area") or unit.get("floor_area") or 0
        if area < MIN_AREA:
-            excluded["plocha"] += 1
+            excluded_area += 1
-            logger.debug(f"id={unit_id}: přeskočen (plocha={area} m²)")
+            logger.debug(f"Filter: id={unit_id} - excluded (area {area} m²)")
            continue
-        # Patro
+        # Floor
        floor_str = str(unit.get("floor", ""))
        floor = None
        if floor_str:
            try:
                floor = int(floor_str)
            except ValueError:
-                m = re.search(r'(-?\d+)', floor_str)
+                floor_match = re.search(r'(-?\d+)', floor_str)
-                if m:
+                if floor_match:
-                    floor = int(m.group(1))
+                    floor = int(floor_match.group(1))
        if floor is not None and floor < MIN_FLOOR:
-            excluded["patro"] += 1
+            excluded_floor += 1
-            logger.debug(f"id={unit_id}: přeskočen (patro={floor})")
+            logger.debug(f"Filter: id={unit_id} - excluded (floor {floor})")
            continue
-        # GPS — opravit prohozené souřadnice
+        # Construction — check for panel
-        lat_raw = unit.get("latitude")
+        build_type = str(unit.get("build_type", "")).lower()
-        lng_raw = unit.get("longitude")
+        if "panel" in build_type:
-        lat, lng = fix_gps(lat_raw, lng_raw)
+            excluded_panel += 1
-        if not lat or not lng:
+            logger.debug(f"Filter: id={unit_id} - excluded (panel construction)")
-            logger.warning(f"id={unit_id}: chybí GPS souřadnice, přeskakuji")
+            logger.info(f"✗ Vyloučen: panel ({build_type})")
            continue
-        # Sestavit adresu pro locality
+        # Build construction label
-        addr = unit.get("address") or {}
+        building_type = "neuvedeno"
-        street = addr.get("street", "")
+        if build_type and build_type != "nevybráno":
-        street_no = addr.get("street_no", "")
+            if "cihlo" in build_type or "cihla" in build_type:
-        if street and street_no:
+                building_type = "Cihlová"
-            locality_str = f"{street} {street_no}, {city}"
+            elif "skelet" in build_type:
-        elif street:
+                building_type = "Skeletová"
            locality_str = f"{street}, {city}"
            else:
-            project_name = unit.get("project", "")
+                building_type = build_type.capitalize()
            locality_str = f"{project_name}, {city}" if project_name else city
-        # URL na detail jednotky
+        lat = unit.get("latitude", 0)
-        unit_slug = unit.get("slug", "")
+        lon = unit.get("longitude", 0)
-        project_slug = ""
+
-        # project_slug lze odvodit z projektu nebo z reference_no
+        slug = unit.get("slug", "")
-        # API nevrací project_slug přímo — použijeme reference_no nebo jen ID
+        project_slug = unit.get("_project_slug", "")
-        reference_no = unit.get("reference_no", "")
+        detail_url = f"{BASE_URL}/projekt/{project_slug}/{slug}" if slug else f"{BASE_URL}/projekt/{project_slug}"
-        if unit_slug:
+
-            detail_url = f"{BASE_URL}/prodej/{unit_slug}"
+        today = datetime.now().strftime("%Y-%m-%d")
-        elif reference_no:
+        hash_id = unit.get("id", slug)
-            detail_url = f"{BASE_URL}/prodej/{reference_no}"
+        first_seen = previous_first_seen.get(str(hash_id), "") or today
        else:
            detail_url = BASE_URL
        result = {
-            "hash_id": str(unit_id),
+            "hash_id": hash_id,
-            "name": f"Prodej bytu {disp}, {int(area)} m² — {unit.get('project', locality_str)}",
+            "name": f"Prodej bytu {disp} {area} m² — {unit.get('_project_name', '')}",
            "price": int(price),
            "price_formatted": format_price(int(price)),
-            "locality": locality_str,
+            "locality": f"{unit.get('street', unit.get('_project_name', ''))}, Praha",
            "lat": lat,
-            "lon": lng,
+            "lon": lon,
            "disposition": disp,
            "floor": floor,
-            "area": float(area),
+            "area": area,
-            "building_type": "neuvedeno",
+            "building_type": building_type,
-            "ownership": "osobní",
+            "ownership": unit.get("ownership", "neuvedeno") or "neuvedeno",
            "url": detail_url,
            "source": "psn",
            "image": "",
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        properties_fetched += 1
    logger.info(f"\n{'=' * 60}")
    logger.info(f"Výsledky PSN:")
-    logger.info(f"  Staženo jednotek:      {len(all_units)}")
+    logger.info(f"  Celkem jednotek:       {len(unique_units)}")
-    for reason, count in excluded.items():
+    logger.info(f"  Vyloučeno (prodáno):   {excluded_sold}")
-        if count:
+    logger.info(f"  Vyloučeno (typ):       {excluded_type}")
-            logger.info(f"  Vyloučeno ({reason}): {count}")
+    logger.info(f"  Vyloučeno (dispozice): {excluded_disp}")
    logger.info(f"  Vyloučeno (cena):      {excluded_price}")
    logger.info(f"  Vyloučeno (plocha):    {excluded_area}")
    logger.info(f"  Vyloučeno (patro):     {excluded_floor}")
    logger.info(f"  Vyloučeno (panel):     {excluded_panel}")
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")
@@ -246,13 +340,15 @@ def scrape(max_properties: int | None = None):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Scrape apartments from PSN.cz")
    parser.add_argument("--max-pages", type=int, default=None,
                        help="Maximum number of listing pages per project to scrape")
    parser.add_argument("--max-properties", type=int, default=None,
                        help="Maximum number of properties to include in results")
-    parser.add_argument("--log-level", type=str, default="INFO",
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
                        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
                        help="Logging level (default: INFO)")
    args = parser.parse_args()
    # Configure logging
    logging.basicConfig(
        level=getattr(logging, args.log_level),
        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
@@ -260,7 +356,7 @@ if __name__ == "__main__":
    )
    start = time.time()
-    estates = scrape(max_properties=args.max_properties)
+    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
    if estates:
        json_path = Path("byty_psn.json")
@@ -270,6 +366,6 @@ if __name__ == "__main__":
        )
        elapsed = time.time() - start
        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
-        logger.info(f"⏱  Celkový čas: {elapsed:.1f} s")
+        logger.info(f"⏱  Celkový čas: {elapsed:.0f} s")
    else:
        logger.info("\nŽádné byty z PSN neodpovídají kritériím :(")
--- a/scrape_realingo.py
+++ b/scrape_realingo.py
@@ -7,13 +7,13 @@ Výstup: byty_realingo.json
 from __future__ import annotations
 import argparse
 from datetime import datetime
 import json
 import logging
 import math
 import re
 import time
 import urllib.request
 from datetime import datetime
 from pathlib import Path
 logger = logging.getLogger(__name__)
@@ -239,10 +239,14 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        # Check cache — if hash_id exists and price unchanged, reuse
        item_id = int(item["id"])
        item_price = item.get("price", {}).get("total", 0) or 0
        today = datetime.now().strftime("%Y-%m-%d")
        cached = cache.get(item_id)
        if cached and cached.get("price") == item_price:
            cache_hits += 1
            logger.debug(f"Cache hit for id={item_id}")
            cached["last_updated"] = today
            if "first_seen" not in cached:
                cached["first_seen"] = today
            results.append(cached)
            continue
@@ -299,6 +303,11 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
        cat = item.get("category", "")
        loc = item.get("location", {})
        # Preserve first_seen from cache if this is a price-changed re-fetch
        first_seen = today
        if cached and "first_seen" in cached:
            first_seen = cached["first_seen"]
        result = {
            "hash_id": int(item["id"]),
            "name": f"Prodej bytu {CATEGORY_LABELS.get(cat, '?')} {item.get('area', {}).get('main', '?')} m²",
@@ -315,7 +324,8 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
            "url": f"{BASE_URL}{item['url']}",
            "source": "realingo",
            "image": "",
-            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": first_seen,
            "last_updated": today,
        }
        results.append(result)
        properties_fetched += 1
--- a/status.html
+++ b/status.html
@@ -1,204 +0,0 @@
 <!DOCTYPE html>
 <html lang="cs">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>Scraper status</title>
 <style>
  * { margin: 0; padding: 0; box-sizing: border-box; }
  body {
    font-family: system-ui, -apple-system, sans-serif;
    background: #f5f5f5; color: #333;
    padding: 24px; max-width: 640px; margin: 0 auto;
  }
  h1 { font-size: 22px; margin-bottom: 4px; }
  .subtitle { color: #888; font-size: 13px; margin-bottom: 24px; }
  .card {
    background: white; border-radius: 12px; padding: 20px;
    box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px;
  }
  .card h2 { font-size: 15px; margin-bottom: 12px; color: #555; }
  .timestamp {
    font-size: 28px; font-weight: 700; color: #1976D2;
  }
  .timestamp-ago { font-size: 13px; color: #999; margin-top: 2px; }
  /* Source table */
  .source-table { width: 100%; border-collapse: collapse; }
  .source-table td { padding: 8px 0; border-bottom: 1px solid #f0f0f0; font-size: 14px; }
  .source-table tr:last-child td { border-bottom: none; }
  .source-table .name { font-weight: 600; }
  .source-table .count { text-align: right; font-variant-numeric: tabular-nums; }
  .source-table .rejected { text-align: right; color: #999; font-size: 12px; }
  .badge {
    display: inline-block; padding: 2px 8px; border-radius: 4px;
    font-size: 11px; font-weight: 600; color: white;
  }
  .badge-ok { background: #4CAF50; }
  .badge-err { background: #F44336; }
  .badge-skip { background: #FF9800; }
  /* Summary bar */
  .summary-row {
    display: flex; justify-content: space-between; align-items: center;
    padding: 10px 0; border-bottom: 1px solid #f0f0f0;
  }
  .summary-row:last-child { border-bottom: none; }
  .summary-label { font-size: 13px; color: #666; }
  .summary-value { font-size: 18px; font-weight: 700; }
  /* Source bar chart */
  .bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; }
  .bar-label { width: 90px; font-size: 12px; text-align: right; color: #666; }
  .bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; position: relative; }
  .bar-fill { height: 100%; border-radius: 4px; transition: width 0.5s ease; }
  .bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; }
  /* Loader */
  .loader-wrap {
    display: flex; flex-direction: column; align-items: center;
    justify-content: center; padding: 60px 0;
  }
  .spinner {
    width: 40px; height: 40px; border: 4px solid #e0e0e0;
    border-top-color: #1976D2; border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
  @keyframes spin { to { transform: rotate(360deg); } }
  .loader-text { margin-top: 16px; color: #999; font-size: 14px; }
  .error-msg { color: #F44336; padding: 40px 0; text-align: center; }
  .link-row { text-align: center; margin-top: 8px; }
  .link-row a { color: #1976D2; text-decoration: none; font-size: 14px; }
 </style>
 </head>
 <body>
 <h1>Scraper status</h1>
 <div class="subtitle">maru-hleda-byt</div>
 <div id="content">
  <div class="loader-wrap">
    <div class="spinner"></div>
    <div class="loader-text">Nacitam status...</div>
  </div>
 </div>
 <div class="link-row"><a href="mapa_bytu.html">Otevrit mapu</a></div>
 <script>
 var COLORS = {
  sreality: '#1976D2',
  realingo: '#7B1FA2',
  bezrealitky: '#E65100',
  idnes: '#C62828',
  psn: '#2E7D32',
  cityhome: '#00838F',
 };
 function timeAgo(dateStr) {
  var d = new Date(dateStr);
  var now = new Date();
  var diff = Math.floor((now - d) / 1000);
  if (diff < 60) return 'prave ted';
  if (diff < 3600) return Math.floor(diff / 60) + ' min zpet';
  if (diff < 86400) return Math.floor(diff / 3600) + ' hod zpet';
  return Math.floor(diff / 86400) + ' dni zpet';
 }
 function formatDate(dateStr) {
  var d = new Date(dateStr);
  var day = d.getDate();
  var months = ['ledna','unora','brezna','dubna','kvetna','cervna',
    'cervence','srpna','zari','rijna','listopadu','prosince'];
  var hh = String(d.getHours()).padStart(2, '0');
  var mm = String(d.getMinutes()).padStart(2, '0');
  return day + '. ' + months[d.getMonth()] + ' ' + d.getFullYear() + ', ' + hh + ':' + mm;
 }
 function render(data) {
  // Check if scrape is currently running
  if (data.status === 'running') {
    document.getElementById('content').innerHTML =
      '<div class="loader-wrap">' +
      '<div class="spinner"></div>' +
      '<div class="loader-text">Scraper prave bezi...</div>' +
      '</div>';
    setTimeout(loadStatus, 30000);
    return;
  }
  var sources = data.sources || [];
  var totalOk = 0, totalRej = 0;
  var maxCount = 0;
  sources.forEach(function(s) {
    totalOk += s.accepted || 0;
    totalRej += s.rejected || 0;
    if (s.accepted > maxCount) maxCount = s.accepted;
  });
  var html = '';
  // Timestamp card
  html += '<div class="card">';
  html += '<h2>Posledni scrape</h2>';
  html += '<div class="timestamp">' + formatDate(data.timestamp) + '</div>';
  html += '<div class="timestamp-ago">' + timeAgo(data.timestamp) + '</div>';
  if (data.duration_sec) {
    html += '<div class="timestamp-ago">Trvani: ' + Math.round(data.duration_sec) + 's</div>';
  }
  html += '</div>';
  // Summary card
  html += '<div class="card">';
  html += '<h2>Souhrn</h2>';
  html += '<div class="summary-row"><span class="summary-label">Vyhovujicich bytu</span><span class="summary-value" style="color:#4CAF50">' + totalOk + '</span></div>';
  html += '<div class="summary-row"><span class="summary-label">Vyloucenych</span><span class="summary-value" style="color:#999">' + totalRej + '</span></div>';
  if (data.deduplicated !== undefined) {
    html += '<div class="summary-row"><span class="summary-label">Po deduplikaci (v mape)</span><span class="summary-value" style="color:#1976D2">' + data.deduplicated + '</span></div>';
  }
  html += '</div>';
  // Sources card
  html += '<div class="card">';
  html += '<h2>Zdroje</h2>';
  sources.forEach(function(s) {
    var color = COLORS[s.name.toLowerCase()] || '#999';
    var pct = maxCount > 0 ? Math.round((s.accepted / maxCount) * 100) : 0;
    var badge = s.error
      ? '<span class="badge badge-err">chyba</span>'
      : (s.accepted === 0 ? '<span class="badge badge-skip">0</span>' : '<span class="badge badge-ok">OK</span>');
    html += '<div style="margin-bottom:12px;">';
    html += '<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">';
    html += '<span style="font-weight:600;font-size:14px;">' + s.name + ' ' + badge + '</span>';
    html += '<span style="font-size:12px;color:#999;">' + (s.rejected || 0) + ' vyloucenych</span>';
    html += '</div>';
    html += '<div class="bar-row">';
    html += '<div class="bar-track"><div class="bar-fill" style="width:' + pct + '%;background:' + color + ';"></div></div>';
    html += '<span class="bar-count">' + (s.accepted || 0) + '</span>';
    html += '</div>';
    html += '</div>';
  });
  html += '</div>';
  document.getElementById('content').innerHTML = html;
 }
 function loadStatus() {
  fetch('status.json?t=' + Date.now())
    .then(function(r) {
      if (!r.ok) throw new Error(r.status);
      return r.json();
    })
    .then(render)
    .catch(function(err) {
      document.getElementById('content').innerHTML =
        '<div class="error-msg">Status zatim neni k dispozici.<br><small>(' + err.message + ')</small></div>';
    });
 }
 loadStatus();
 </script>
 </body>
 </html>