Add status dashboard, server, scraper stats, and DATA_DIR support

Key changes: - Replace ratings_server.py + status.html with a unified server.py that serves the map, scraper status dashboard, and ratings API in one process - Add scraper_stats.py utility: each scraper writes per-run stats (fetched, accepted, excluded, duration) to stats_<source>.json for the status page - generate_status.py: respect DATA_DIR env var so status.json lands in the configured data directory instead of always the project root - run_all.sh: replace the {"status":"running"} overwrite of status.json with a dedicated scraper_running.json lock file; trap on EXIT ensures cleanup even on kill/error, preventing the previous run's results from being wiped - server.py: detect running state via scraper_running.json existence instead of status["status"] field, eliminating the dual-use race condition - Makefile: add serve (local dev), debug (Docker debug container) targets; add SERVER_PORT variable - build/Dockerfile + entrypoint.sh: switch to server.py, set DATA_DIR, adjust volume mounts - .gitignore: add *.json and *.log to keep runtime data files out of VCS - mapa_bytu.html: price-per-m² colouring, status link, UX tweaks Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 00:30:25 +01:00
parent 6f49533c94
commit 5fb3b984b6
17 changed files with 929 additions and 1122 deletions
--- a/scrape_psn.py
+++ b/scrape_psn.py
@@ -15,6 +15,9 @@ import time
 from datetime import datetime
 from pathlib import Path
 from urllib.parse import urlencode
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_psn.json"

 logger = logging.getLogger(__name__)

@@ -67,6 +70,8 @@ def format_price(price: int) -> str:


 def scrape(max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    logger.info("=" * 60)
    logger.info("Stahuji inzeráty z PSN.cz")
    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
@@ -93,6 +98,15 @@ def scrape(max_properties: int | None = None):
        data = fetch_json(url)
    except Exception as e:
        logger.error(f"Chyba při stahování: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - _run_start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
        return []

    all_units = data.get("units", {}).get("data", [])
@@ -241,6 +255,15 @@ def scrape(max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "PSN",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_units),
+        "excluded": excluded,
+    })
    return results


@@ -259,8 +282,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_properties=args.max_properties)
+    try:
+        estates = scrape(max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_psn.json")