Remove tracked generated/data files and fix map link on status page

- Remove byty_*.json, mapa_bytu.html, .DS_Store and settings.local.json from git tracking (already in .gitignore, files kept locally) - Fix "Otevřít mapu" link on scraper status page: / → /mapa_bytu.html Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Merge pull request 'Add scraper status collection and presentation' (#3 ) from add-scraper-statuses into main
2026-02-26 18:50:16 +01:00 · 2026-02-26 09:04:23 +00:00 · 2026-02-26 09:46:16 +01:00 · 2026-02-26 08:53:27 +01:00 · 2026-02-26 00:30:25 +01:00 · 2026-02-25 21:26:51 +00:00
28 changed files with 958 additions and 32747 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,31 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "WebFetch(domain:github.com)",
-      "WebFetch(domain:www.sreality.cz)",
-      "WebFetch(domain:webscraping.pro)",
-      "WebFetch(domain:raw.githubusercontent.com)",
-      "Bash(python3:*)",
-      "Bash(open:*)",
-      "WebFetch(domain:www.realingo.cz)",
-      "WebFetch(domain:api.realingo.cz)",
-      "Bash(curl:*)",
-      "Bash(grep:*)",
-      "WebFetch(domain:www.realitni-pes.cz)",
-      "WebFetch(domain:www.bezrealitky.cz)",
-      "WebFetch(domain:apify.com)",
-      "WebFetch(domain:www.bezrealitky.com)",
-      "WebFetch(domain:reality.idnes.cz)",
-      "Bash(# Final checks: robots.txt and response time for rate limiting clues curl -s -L -H \"\"User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/120.0.0.0 Safari/537.36\"\" \"\"https://reality.idnes.cz/robots.txt\"\")",
-      "WebFetch(domain:www.cityhome.cz)",
-      "WebFetch(domain:www.psn.cz)",
-      "WebFetch(domain:www.city-home.cz)",
-      "WebFetch(domain:psn.cz)",
-      "WebFetch(domain:api.psn.cz)",
-      "Bash(done)",
-      "Bash(# Final summary: count total units across all projects\n# Get the total count from the unitsCountData we already extracted\necho \"\"From unitsCountData on /prodej page:\"\"\necho \"\"  type_id 0 \\(Prodej bytů a ateliérů\\): 146\"\"\necho \"\"  type_id 1 \\(Prodej komerčních nemovitostí\\): 14\"\"\necho \"\"  type_id 2 \\(Pronájem bytů\\): 3\"\"\necho \"\"  type_id 3 \\(Pronájem komerčních nemovitostí\\): 48\"\"\necho \"\"\"\"\necho \"\"Total for-sale projects: 19\"\"\necho \"\"\"\"\necho \"\"Disposition counts from the data:\"\"\npython3 << 'PYEOF'\n# Extract disposition counts from prodej page\nimport re\n\nwith open\\('/tmp/psn_prodej_p1.html', 'r', encoding='utf-8'\\) as f:\n    html = f.read\\(\\)\n\n# Find disposition data\nidx = html.find\\('\\\\\\\\\"disposition\\\\\\\\\":['\\)\nif idx >= 0:\n    chunk = html[idx:idx+2000].replace\\('\\\\\\\\\"', '\"'\\)\n    # Extract name and count pairs\n    import re\n    pairs = re.findall\\(r'\"name\":\"\\([^\"]+\\)\",\"count\":\\(\\\\d+\\)', chunk\\)\n    for name, count in pairs:\n        print\\(f\"  {name}: {count}\"\\)\nPYEOF)",
-      "Bash(ls:*)",
-      "Bash(chmod:*)"
-    ]
-  }
-}
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
 .vscode/
 __pycache__/
+.DS_Store
 byty_*.json
+*.json
+*.log
+mapa_bytu.html
+
--- a/30
+++ b/30
@@ -3,9 +3,13 @@ CONTAINER_NAME    := maru-hleda-byt
 VOLUME_NAME       := maru-hleda-byt-data
 VALIDATION_CONTAINER := maru-hleda-byt-validation
 VALIDATION_VOLUME  := maru-hleda-byt-validation-data
+DEBUG_CONTAINER   := maru-hleda-byt-debug
+DEBUG_VOLUME      := maru-hleda-byt-debug-data
+DEBUG_PORT        ?= 8082
 PORT              := 8080
+SERVER_PORT       ?= 8080

-.PHONY: build run stop logs scrape restart clean help validation validation-local validation-stop validation-local-debug
+.PHONY: build run stop logs scrape restart clean help serve validation validation-local validation-stop validation-local-debug debug debug-stop

 help:
 	@echo "Available targets:"
@@ -20,6 +24,9 @@ help:
 	@echo "  validation-local-debug - Run validation locally with DEBUG logging"
 	@echo "  restart             - Restart the container (stop and run again)"
 	@echo "  clean               - Stop container and remove the Docker image"
+	@echo "  serve               - Start server.py locally on port 8080"
+	@echo "  debug               - Build and run debug Docker container with limited scrape (port $(DEBUG_PORT))"
+	@echo "  debug-stop          - Stop and remove the debug Docker container"
 	@echo "  help                - Show this help message"

 build:
@@ -59,6 +66,27 @@ validation-stop:
 	@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
 	@echo "Validation container stopped and removed"

+debug: build
+	@docker stop $(DEBUG_CONTAINER) 2>/dev/null || true
+	@docker rm $(DEBUG_CONTAINER) 2>/dev/null || true
+	docker run -d --name $(DEBUG_CONTAINER) \
+		-p $(DEBUG_PORT):8080 \
+		-v $(DEBUG_VOLUME):/app/data \
+		-e LOG_LEVEL=DEBUG \
+		$(IMAGE_NAME)
+	@sleep 2
+	docker exec $(DEBUG_CONTAINER) bash /app/run_all.sh --max-pages 1 --max-properties 10
+	@echo "Debug app at http://localhost:$(DEBUG_PORT)/mapa_bytu.html"
+	@echo "Debug status at http://localhost:$(DEBUG_PORT)/scrapers-status"
+
+debug-stop:
+	@docker stop $(DEBUG_CONTAINER) 2>/dev/null || true
+	@docker rm $(DEBUG_CONTAINER) 2>/dev/null || true
+	@echo "Debug container stopped and removed"
+
+serve:
+	DATA_DIR=. SERVER_PORT=$(SERVER_PORT) python3 server.py
+
 validation-local:
 	./run_all.sh --max-pages 1 --max-properties 10

--- a/build/Dockerfile
+++ b/build/Dockerfile
@@ -5,12 +5,14 @@ RUN apk add --no-cache curl bash tzdata \
    && echo "Europe/Prague" > /etc/timezone

 ENV PYTHONUNBUFFERED=1
+ENV DATA_DIR=/app/data

 WORKDIR /app

 COPY scrape_and_map.py scrape_realingo.py scrape_bezrealitky.py \
     scrape_idnes.py scrape_psn.py scrape_cityhome.py \
-     merge_and_map.py regen_map.py run_all.sh ratings_server.py ./
+     merge_and_map.py regen_map.py generate_status.py scraper_stats.py \
+     run_all.sh server.py ./

 COPY build/crontab /etc/crontabs/root
 COPY build/entrypoint.sh /entrypoint.sh
@@ -18,7 +20,7 @@ RUN chmod +x /entrypoint.sh run_all.sh

 RUN mkdir -p /app/data

-EXPOSE 8080 8081
+EXPOSE 8080

 HEALTHCHECK --interval=60s --timeout=5s --start-period=300s \
    CMD wget -q -O /dev/null http://localhost:8080/ || exit 1
--- a/build/crontab
+++ b/build/crontab
@@ -1 +1 @@
-0 6,18 * * * cd /app && bash /app/run_all.sh >> /proc/1/fd/1 2>> /proc/1/fd/2
+0 */4 * * * cd /app && bash /app/run_all.sh >> /proc/1/fd/1 2>> /proc/1/fd/2
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -euo pipefail

-DATA_DIR="/app/data"
+export DATA_DIR="/app/data"

 # Create symlinks so scripts (which write to /app/) persist data to the volume
 for f in byty_sreality.json byty_realingo.json byty_bezrealitky.json \
@@ -18,8 +18,5 @@ crond -b -l 2
 echo "[entrypoint] Starting initial scrape in background..."
 bash /app/run_all.sh &

-echo "[entrypoint] Starting ratings API server on port 8081..."
-DATA_DIR="$DATA_DIR" python3 /app/ratings_server.py &
-
-echo "[entrypoint] Starting HTTP server on port 8080..."
-exec python3 -m http.server 8080 --directory "$DATA_DIR"
+echo "[entrypoint] Starting server on port 8080..."
+exec python3 /app/server.py
--- a/byty_bezrealitky.json
+++ b/byty_bezrealitky.json
@@ -1,427 +0,0 @@
-[
-  {
-    "hash_id": 990183,
-    "name": "Prodej bytu 3+kk 86 m²",
-    "price": 10385000,
-    "price_formatted": "10 385 000 Kč",
-    "locality": "Ke Tvrzi, Praha - Královice",
-    "lat": 50.0390519,
-    "lon": 14.63862,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 86,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/990183-nabidka-prodej-bytu-ke-tvrzi-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 989862,
-    "name": "Prodej bytu 3+kk 73 m²",
-    "price": 12790000,
-    "price_formatted": "12 790 000 Kč",
-    "locality": "Vrázova, Praha - Smíchov",
-    "lat": 50.0711312,
-    "lon": 14.4076652,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 73,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/989862-nabidka-prodej-bytu-vrazova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981278,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 11890000,
-    "price_formatted": "11 890 000 Kč",
-    "locality": "Argentinská, Praha - Holešovice",
-    "lat": 50.1026043,
-    "lon": 14.4435365,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981278-nabidka-prodej-bytu-argentinska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 989817,
-    "name": "Prodej bytu 3+kk 88 m²",
-    "price": 13490000,
-    "price_formatted": "13 490 000 Kč",
-    "locality": "Miroslava Hajna, Praha - Letňany",
-    "lat": 50.1406487,
-    "lon": 14.5207541,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 88,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/989817-nabidka-prodej-bytu-miroslava-hajna-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 970257,
-    "name": "Prodej bytu 3+1 106 m²",
-    "price": 12950000,
-    "price_formatted": "12 950 000 Kč",
-    "locality": "Novákových, Praha - Libeň",
-    "lat": 50.1034771,
-    "lon": 14.4758735,
-    "disposition": "3+1",
-    "floor": 5,
-    "area": 106,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/970257-nabidka-prodej-bytu-novakovych-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 972406,
-    "name": "Prodej bytu 3+kk 83 m²",
-    "price": 10490000,
-    "price_formatted": "10 490 000 Kč",
-    "locality": "Na Výrovně, Praha - Stodůlky",
-    "lat": 50.0396067,
-    "lon": 14.3167022,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 83,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/972406-nabidka-prodej-bytu-na-vyrovne",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 967142,
-    "name": "Prodej bytu 3+kk 78 m²",
-    "price": 11648000,
-    "price_formatted": "11 648 000 Kč",
-    "locality": "Na Míčánkách, Praha - Vršovice",
-    "lat": 50.0713284,
-    "lon": 14.4638722,
-    "disposition": "3+kk",
-    "floor": 6,
-    "area": 78,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/967142-nabidka-prodej-bytu-na-micankach",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 955977,
-    "name": "Prodej bytu 4+kk 75 m²",
-    "price": 10363000,
-    "price_formatted": "10 363 000 Kč",
-    "locality": "Karla Guta, Praha - Uhříněves",
-    "lat": 50.03017,
-    "lon": 14.5940072,
-    "disposition": "4+kk",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/955977-nabidka-prodej-bytu-karla-guta",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 974557,
-    "name": "Prodej bytu 4+kk 94 m²",
-    "price": 13499900,
-    "price_formatted": "13 499 900 Kč",
-    "locality": "V Dolině, Praha - Michle",
-    "lat": 50.0579963,
-    "lon": 14.4682887,
-    "disposition": "4+kk",
-    "floor": 8,
-    "area": 94,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/974557-nabidka-prodej-bytu-v-doline-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 988498,
-    "name": "Prodej bytu 3+1 75 m²",
-    "price": 11400000,
-    "price_formatted": "11 400 000 Kč",
-    "locality": "5. května, Praha - Nusle",
-    "lat": 50.0604096,
-    "lon": 14.4326302,
-    "disposition": "3+1",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/988498-nabidka-prodej-bytu-5-kvetna-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985285,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 12200000,
-    "price_formatted": "12 200 000 Kč",
-    "locality": "Klausova, Praha - Stodůlky",
-    "lat": 50.0370204,
-    "lon": 14.3432643,
-    "disposition": "3+kk",
-    "floor": 5,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985285-nabidka-prodej-bytu-klausova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 965526,
-    "name": "Prodej bytu 3+kk 77 m²",
-    "price": 11890000,
-    "price_formatted": "11 890 000 Kč",
-    "locality": "Vinohradská, Praha - Strašnice",
-    "lat": 50.0776726,
-    "lon": 14.4870072,
-    "disposition": "3+kk",
-    "floor": 16,
-    "area": 77,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/965526-nabidka-prodej-bytu-vinohradska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 924811,
-    "name": "Prodej bytu 3+kk 75 m²",
-    "price": 13390000,
-    "price_formatted": "13 390 000 Kč",
-    "locality": "Waltariho, Praha - Hloubětín",
-    "lat": 50.1076717,
-    "lon": 14.5248559,
-    "disposition": "3+kk",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/924811-nabidka-prodej-bytu-waltariho-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985859,
-    "name": "Prodej bytu 3+1 80 m²",
-    "price": 9000000,
-    "price_formatted": "9 000 000 Kč",
-    "locality": "Staňkova, Praha - Háje",
-    "lat": 50.0377128,
-    "lon": 14.5311557,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 80,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985859-nabidka-prodej-bytu-stankova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985583,
-    "name": "Prodej bytu 3+kk 76 m²",
-    "price": 10850000,
-    "price_formatted": "10 850 000 Kč",
-    "locality": "Boloňská, Praha - Horní Měcholupy",
-    "lat": 50.047328,
-    "lon": 14.5565277,
-    "disposition": "3+kk",
-    "floor": 4,
-    "area": 76,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985583-nabidka-prodej-bytu-bolonska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981178,
-    "name": "Prodej bytu 4+kk 86 m²",
-    "price": 11990000,
-    "price_formatted": "11 990 000 Kč",
-    "locality": "Sušilova, Praha - Uhříněves",
-    "lat": 50.032081,
-    "lon": 14.5885148,
-    "disposition": "4+kk",
-    "floor": 2,
-    "area": 86,
-    "building_type": "SKELET",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981178-nabidka-prodej-bytu-susilova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 973216,
-    "name": "Prodej bytu 4+1 82 m²",
-    "price": 11357000,
-    "price_formatted": "11 357 000 Kč",
-    "locality": "Nad Kapličkou, Praha - Strašnice",
-    "lat": 50.0839509,
-    "lon": 14.4904493,
-    "disposition": "4+1",
-    "floor": 2,
-    "area": 82,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/973216-nabidka-prodej-bytu-nad-kaplickou-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 868801,
-    "name": "Prodej bytu 3+kk 109 m²",
-    "price": 7299000,
-    "price_formatted": "7 299 000 Kč",
-    "locality": "Pod Karlovem, Praha - Vinohrady",
-    "lat": 50.0676313,
-    "lon": 14.432498,
-    "disposition": "3+kk",
-    "floor": 5,
-    "area": 109,
-    "building_type": "Cihlová",
-    "ownership": "Družstevní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/868801-nabidka-prodej-bytu-pod-karlovem-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 868795,
-    "name": "Prodej bytu 3+kk 106 m²",
-    "price": 6299000,
-    "price_formatted": "6 299 000 Kč",
-    "locality": "Pod Karlovem, Praha - Vinohrady",
-    "lat": 50.0676313,
-    "lon": 14.432498,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 106,
-    "building_type": "Cihlová",
-    "ownership": "Družstevní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/868795-nabidka-prodej-bytu-pod-karlovem-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981890,
-    "name": "Prodej bytu 3+1 84 m²",
-    "price": 12980000,
-    "price_formatted": "12 980 000 Kč",
-    "locality": "Novákových, Praha - Libeň",
-    "lat": 50.103273,
-    "lon": 14.4746894,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 84,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981890-nabidka-prodej-bytu-novakovych-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 976276,
-    "name": "Prodej bytu 3+kk 75 m²",
-    "price": 13490000,
-    "price_formatted": "13 490 000 Kč",
-    "locality": "Svornosti, Praha - Smíchov",
-    "lat": 50.0673284,
-    "lon": 14.4095087,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/976276-nabidka-prodej-bytu-svornosti-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 950787,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 9999000,
-    "price_formatted": "9 999 000 Kč",
-    "locality": "Sečská, Praha - Strašnice",
-    "lat": 50.071191,
-    "lon": 14.5035501,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 70,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/950787-nabidka-prodej-bytu-secska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 978045,
-    "name": "Prodej bytu 3+kk 76 m²",
-    "price": 11133000,
-    "price_formatted": "11 133 000 Kč",
-    "locality": "K Vinoři, Praha - Kbely",
-    "lat": 50.1329656,
-    "lon": 14.5618499,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 76,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/978045-nabidka-prodej-bytu-k-vinori",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 974552,
-    "name": "Prodej bytu 3+1 75 m²",
-    "price": 11000000,
-    "price_formatted": "11 000 000 Kč",
-    "locality": "Vejražkova, Praha - Košíře",
-    "lat": 50.0637808,
-    "lon": 14.3612275,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/974552-nabidka-prodej-bytu-vejrazkova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 955010,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 12290000,
-    "price_formatted": "12 290 000 Kč",
-    "locality": "Břeclavská, Praha - Kyje",
-    "lat": 50.0951045,
-    "lon": 14.5454237,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/955010-nabidka-prodej-bytu-breclavska-hlavni-mesto-praha",
-    "source": "bezrealitky",
-    "image": ""
-  }
-]
--- a/byty_cityhome.json
+++ b/byty_cityhome.json
@@ -1 +0,0 @@
-[]
--- a/byty_idnes.json
+++ b/byty_idnes.json
--- a/byty_merged.json
+++ b/byty_merged.json
--- a/byty_psn.json
+++ b/byty_psn.json
@@ -1 +0,0 @@
-[]
--- a/byty_realingo.json
+++ b/byty_realingo.json
--- a/byty_sreality.json
+++ b/byty_sreality.json
--- a/docs/validation.md
+++ b/docs/validation.md
@@ -0,0 +1,123 @@
+# Validation Recipe
+
+End-to-end check that scraping, data persistence, history, and the status page all work correctly in Docker.
+
+## What it verifies
+
+- All scrapers run and write output to `DATA_DIR` (`/app/data`)
+- `stats_*.json` land in `/app/data/` (not in `/app/`)
+- `status.json` and `scraper_history.json` land in `/app/data/`
+- `/api/status`, `/api/status/history`, and `/scrapers-status` serve correct data
+- History accumulates across runs
+
+## Steps
+
+### 1. Build the image
+
+```bash
+make build
+```
+
+### 2. Start a clean validation container
+
+```bash
+# Stop/remove any leftover container and volume from a previous run
+docker stop maru-hleda-byt-validation 2>/dev/null; docker rm maru-hleda-byt-validation 2>/dev/null
+docker volume rm maru-hleda-byt-validation-data 2>/dev/null
+
+docker run -d --name maru-hleda-byt-validation \
+  -p 8081:8080 \
+  -v maru-hleda-byt-validation-data:/app/data \
+  maru-hleda-byt
+```
+
+Give the container ~3 seconds to start. The entrypoint launches a background full scrape automatically — suppress it so only controlled runs execute:
+
+```bash
+sleep 3
+docker exec maru-hleda-byt-validation pkill -f run_all.sh 2>/dev/null || true
+docker exec maru-hleda-byt-validation rm -f /app/data/scraper_running.json 2>/dev/null || true
+```
+
+### 3. Run a limited scrape (run 1)
+
+```bash
+docker exec maru-hleda-byt-validation bash /app/run_all.sh --max-pages 1 --max-properties 10
+```
+
+Expected output (last few lines):
+```
+Status uložen: /app/data/status.json
+Historie uložena: /app/data/scraper_history.json (1 záznamů)
+```
+
+### 4. Verify data files are in `/app/data/`
+
+```bash
+docker exec maru-hleda-byt-validation ls /app/data/
+```
+
+Expected files:
+```
+byty_cityhome.json   byty_idnes.json   byty_merged.json
+byty_realingo.json   byty_sreality.json
+mapa_bytu.html
+scraper_history.json
+stats_bezrealitky.json  stats_cityhome.json  stats_idnes.json
+stats_realingo.json     stats_sreality.json
+status.json
+```
+
+### 5. Run a second limited scrape (run 2)
+
+```bash
+docker exec maru-hleda-byt-validation bash /app/run_all.sh --max-pages 1 --max-properties 10
+```
+
+Expected last line: `Historie uložena: /app/data/scraper_history.json (2 záznamů)`
+
+### 6. Verify history via API
+
+```bash
+curl -s http://localhost:8081/api/status/history | python3 -c "
+import json, sys
+h = json.load(sys.stdin)
+print(f'{len(h)} entries:')
+for i, e in enumerate(h):
+    print(f'  [{i}] {e[\"timestamp\"]} total={e[\"total_accepted\"]}')
+"
+```
+
+Expected: 2 entries with different timestamps.
+
+```bash
+curl -s http://localhost:8081/api/status | python3 -c "
+import json, sys; s=json.load(sys.stdin)
+print(f'status={s[\"status\"]} total={s[\"total_accepted\"]} ts={s[\"timestamp\"]}')
+"
+```
+
+Expected: `status=done total=<N> ts=<latest timestamp>`
+
+### 7. Check the status page
+
+Open http://localhost:8081/scrapers-status in a browser (or `curl -s http://localhost:8081/scrapers-status | grep -c "clickable-row"` — should print `2`).
+
+### 8. Clean up
+
+```bash
+docker stop maru-hleda-byt-validation && docker rm maru-hleda-byt-validation
+docker volume rm maru-hleda-byt-validation-data
+```
+
+Or use the Makefile shortcut:
+
+```bash
+make validation-stop
+```
+
+## Notes
+
+- PSN scraper does not support `--max-pages` and will always fail with this command; `success=False` in history is expected during validation.
+- Bezrealitky may return 0 results with a 1-page limit; `byty_bezrealitky.json` will be absent from `/app/data/` in that case — this is normal.
+- `make validation` (the Makefile target) runs the same limited scrape but does not suppress the background startup scrape, so two concurrent runs may occur. Use the manual steps above for a clean controlled test.
--- a/generate_status.py
+++ b/generate_status.py
@@ -1,16 +1,15 @@
 #!/usr/bin/env python3
-"""Generate status.json from scraper JSON outputs and run log."""
+"""Generate status.json from scraper JSON outputs and per-scraper stats files."""
 from __future__ import annotations

+import argparse
 import json
 import os
-import re
-import sys
 from datetime import datetime
 from pathlib import Path
-from typing import Optional

 HERE = Path(__file__).parent
+DATA_DIR = Path(os.environ.get("DATA_DIR", HERE))

 SOURCE_FILES = {
    "Sreality":    "byty_sreality.json",
@@ -21,7 +20,17 @@ SOURCE_FILES = {
    "CityHome":    "byty_cityhome.json",
 }

+STATS_FILES = {
+    "Sreality":    "stats_sreality.json",
+    "Realingo":    "stats_realingo.json",
+    "Bezrealitky": "stats_bezrealitky.json",
+    "iDNES":       "stats_idnes.json",
+    "PSN":         "stats_psn.json",
+    "CityHome":    "stats_cityhome.json",
+}
+
 MERGED_FILE = "byty_merged.json"
+HISTORY_FILE = "scraper_history.json"


 def count_source(path: Path) -> dict:
@@ -36,105 +45,51 @@ def count_source(path: Path) -> dict:
        return {"accepted": 0, "error": str(e)}


-def parse_log(log_path: str) -> dict[str, dict]:
-    """Parse scraper run log and extract per-source statistics.
-
-    Scrapers log summary lines like:
-      ✓ Vyhovující byty:    12
-      Vyloučeno (prodáno):   5
-      Staženo stránek:       3
-      Staženo inzerátů:      48
-      Celkem bytů v cache:   120
-    and section headers like:
-      [2/6] Realingo
-    """
-    if not log_path or not os.path.exists(log_path):
+def read_scraper_stats(path: Path) -> dict:
+    """Load a per-scraper stats JSON. Returns {} on missing or corrupt file."""
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return data if isinstance(data, dict) else {}
+    except Exception:
        return {}

-    with open(log_path, encoding="utf-8") as f:
-        content = f.read()

-    # Split into per-source sections by the [N/6] Step header
-    # Each section header looks like "[2/6] Realingo\n----..."
-    section_pattern = re.compile(r'\[(\d+)/\d+\]\s+(.+)\n-+', re.MULTILINE)
-    sections_found = list(section_pattern.finditer(content))
+def append_to_history(status: dict, keep: int) -> None:
+    """Append the current status entry to scraper_history.json, keeping only `keep` latest."""
+    history_path = DATA_DIR / HISTORY_FILE
+    history: list = []
+    if history_path.exists():
+        try:
+            history = json.loads(history_path.read_text(encoding="utf-8"))
+            if not isinstance(history, list):
+                history = []
+        except Exception:
+            history = []

-    if not sections_found:
-        return {}
+    history.append(status)

-    stats = {}
-    for i, match in enumerate(sections_found):
-        step_name = match.group(2).strip()
-        start = match.end()
-        end = sections_found[i + 1].start() if i + 1 < len(sections_found) else len(content)
-        section_text = content[start:end]
+    # Keep only the N most recent entries
+    if keep > 0 and len(history) > keep:
+        history = history[-keep:]

-        # Identify which sources this section covers
-        # "PSN + CityHome" covers both
-        source_names = []
-        for name in SOURCE_FILES:
-            if name.lower() in step_name.lower():
-                source_names.append(name)
-        if not source_names:
-            continue
-
-        # Parse numeric summary lines
-        def extract(pattern: str) -> Optional[int]:
-            m = re.search(pattern, section_text)
-            return int(m.group(1)) if m else None
-
-        # Lines present in all/most scrapers
-        accepted = extract(r'Vyhovující byty[:\s]+(\d+)')
-        fetched = extract(r'Staženo inzerátů[:\s]+(\d+)')
-        pages = extract(r'Staženo stránek[:\s]+(\d+)')
-        cached = extract(r'Celkem bytů v cache[:\s]+(\d+)')
-        cache_hits = extract(r'Cache hit[:\s]+(\d+)')
-
-        # Rejection reasons — collect all into a dict
-        excluded = {}
-        for m in re.finditer(r'Vyloučeno\s+\(([^)]+)\)[:\s]+(\d+)', section_text):
-            excluded[m.group(1)] = int(m.group(2))
-        # Also PSN-style "Vyloučeno (prodáno): N"
-        total_excluded = sum(excluded.values()) if excluded else extract(r'Vyloučen\w*[:\s]+(\d+)')
-
-        entry = {}
-        if accepted is not None:
-            entry["accepted"] = accepted
-        if fetched is not None:
-            entry["fetched"] = fetched
-        if pages is not None:
-            entry["pages"] = pages
-        if cached is not None:
-            entry["cached"] = cached
-        if cache_hits is not None:
-            entry["cache_hits"] = cache_hits
-        if excluded:
-            entry["excluded"] = excluded
-        elif total_excluded is not None:
-            entry["excluded_total"] = total_excluded
-
-        for name in source_names:
-            stats[name] = entry
-
-    return stats
+    history_path.write_text(json.dumps(history, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"Historie uložena: {history_path} ({len(history)} záznamů)")


 def main():
-    start_time = None
-    duration_sec = None
+    parser = argparse.ArgumentParser(description="Generate status.json from scraper outputs.")
+    parser.add_argument("--start-time", dest="start_time", default=None,
+                        help="ISO timestamp of scrape start (default: now)")
+    parser.add_argument("--duration", dest="duration", type=int, default=None,
+                        help="Run duration in seconds")
+    parser.add_argument("--keep", dest="keep", type=int, default=20,
+                        help="Number of history entries to keep (default: 20, 0=unlimited)")
+    args = parser.parse_args()

-    if len(sys.argv) >= 3:
-        start_time = sys.argv[1]
-        try:
-            duration_sec = int(sys.argv[2])
-        except ValueError:
-            pass
-
-    if not start_time:
-        start_time = datetime.now().isoformat(timespec="seconds")
-
-    log_path = sys.argv[3] if len(sys.argv) >= 4 else None
-    log_stats = parse_log(log_path)
+    start_time = args.start_time or datetime.now().isoformat(timespec="seconds")
+    duration_sec = args.duration

    sources = []
    for name, filename in SOURCE_FILES.items():
@@ -142,14 +97,12 @@ def main():
        info = count_source(path)
        info["name"] = name

-        # Merge log stats
-        ls = log_stats.get(name, {})
-        for k in ("fetched", "pages", "cached", "cache_hits", "excluded", "excluded_total"):
-            if k in ls:
-                info[k] = ls[k]
-        # Override accepted from log if available (log is authoritative for latest run)
-        if "accepted" in ls:
-            info["accepted"] = ls["accepted"]
+        # Merge in stats from the per-scraper stats file (authoritative for run data)
+        stats = read_scraper_stats(DATA_DIR / STATS_FILES[name])
+        for key in ("accepted", "fetched", "pages", "cache_hits", "excluded", "excluded_total",
+                    "success", "duration_sec", "error"):
+            if key in stats:
+                info[key] = stats[key]

        sources.append(info)

@@ -168,17 +121,21 @@ def main():

    duplicates_removed = total_accepted - deduplicated if deduplicated else 0

+    # Top-level success: True if no source has an error
+    success = not any("error" in s for s in sources)
+
    status = {
        "status": "done",
        "timestamp": start_time,
        "duration_sec": duration_sec,
+        "success": success,
        "total_accepted": total_accepted,
        "deduplicated": deduplicated,
        "duplicates_removed": duplicates_removed,
        "sources": sources,
    }

-    out = HERE / "status.json"
+    out = DATA_DIR / "status.json"
    out.write_text(json.dumps(status, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"Status uložen: {out}")
    print(f"  Celkem bytů (před dedup): {total_accepted}")
@@ -197,6 +154,8 @@ def main():
            parts.append(f"[CHYBA: {err}]")
        print("  " + "  ".join(parts))

+    append_to_history(status, args.keep)
+

 if __name__ == "__main__":
    main()
--- a/mapa_bytu.html
+++ b/mapa_bytu.html
--- a/ratings_server.py
+++ b/ratings_server.py
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""
-Minimal HTTP API server for persisting apartment ratings.
-
-GET  /api/ratings        → returns ratings.json contents
-POST /api/ratings        → saves entire ratings object
-GET  /api/ratings/export → same as GET, but with download header
-
-Ratings file: /app/data/ratings.json (or ./ratings.json locally)
-"""
-
-import json
-import logging
-import os
-import sys
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from pathlib import Path
-
-PORT = int(os.environ.get("RATINGS_PORT", 8081))
-DATA_DIR = Path(os.environ.get("DATA_DIR", "."))
-RATINGS_FILE = DATA_DIR / "ratings.json"
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [ratings] %(levelname)s %(message)s",
-    datefmt="%Y-%m-%dT%H:%M:%S",
-)
-log = logging.getLogger(__name__)
-
-
-def load_ratings() -> dict:
-    try:
-        if RATINGS_FILE.exists():
-            return json.loads(RATINGS_FILE.read_text(encoding="utf-8"))
-    except Exception as e:
-        log.error("Failed to load ratings: %s", e)
-    return {}
-
-
-def save_ratings(data: dict) -> None:
-    RATINGS_FILE.write_text(
-        json.dumps(data, ensure_ascii=False, indent=2),
-        encoding="utf-8",
-    )
-
-
-class RatingsHandler(BaseHTTPRequestHandler):
-    def log_message(self, format, *args):
-        # Suppress default HTTP access log (we use our own)
-        pass
-
-    def _send_json(self, status: int, body: dict, extra_headers=None):
-        payload = json.dumps(body, ensure_ascii=False).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json; charset=utf-8")
-        self.send_header("Content-Length", str(len(payload)))
-        self.send_header("Access-Control-Allow-Origin", "*")
-        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
-        self.send_header("Access-Control-Allow-Headers", "Content-Type")
-        if extra_headers:
-            for k, v in extra_headers.items():
-                self.send_header(k, v)
-        self.end_headers()
-        self.wfile.write(payload)
-
-    def do_OPTIONS(self):
-        # CORS preflight
-        self.send_response(204)
-        self.send_header("Access-Control-Allow-Origin", "*")
-        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
-        self.send_header("Access-Control-Allow-Headers", "Content-Type")
-        self.end_headers()
-
-    def do_GET(self):
-        if self.path in ("/api/ratings", "/api/ratings/export"):
-            ratings = load_ratings()
-            extra = None
-            if self.path == "/api/ratings/export":
-                extra = {"Content-Disposition": 'attachment; filename="ratings.json"'}
-            log.info("GET %s → %d ratings", self.path, len(ratings))
-            self._send_json(200, ratings, extra)
-        else:
-            self._send_json(404, {"error": "not found"})
-
-    def do_POST(self):
-        if self.path == "/api/ratings":
-            length = int(self.headers.get("Content-Length", 0))
-            if length == 0:
-                self._send_json(400, {"error": "empty body"})
-                return
-            try:
-                raw = self.rfile.read(length)
-                data = json.loads(raw.decode("utf-8"))
-            except Exception as e:
-                log.warning("Bad request body: %s", e)
-                self._send_json(400, {"error": "invalid JSON"})
-                return
-            if not isinstance(data, dict):
-                self._send_json(400, {"error": "expected JSON object"})
-                return
-            save_ratings(data)
-            log.info("POST /api/ratings → saved %d ratings", len(data))
-            self._send_json(200, {"ok": True, "count": len(data)})
-        else:
-            self._send_json(404, {"error": "not found"})
-
-
-if __name__ == "__main__":
-    log.info("Ratings server starting on port %d, data dir: %s", PORT, DATA_DIR)
-    log.info("Ratings file: %s", RATINGS_FILE)
-    server = HTTPServer(("0.0.0.0", PORT), RatingsHandler)
-    try:
-        server.serve_forever()
-    except KeyboardInterrupt:
-        log.info("Stopped.")
-        sys.exit(0)
--- a/run_all.sh
+++ b/run_all.sh
@@ -20,8 +20,10 @@ START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
 START_EPOCH=$(date +%s)
 LOG_FILE="$(pwd)/scrape_run.log"

-# Mark status as running
-echo '{"status":"running"}' > status.json
+# Mark scraper as running; cleaned up on exit (even on error/kill)
+LOCK_FILE="${DATA_DIR:-.}/scraper_running.json"
+echo '{"running":true,"started_at":"'"$START_TIME"'"}' > "$LOCK_FILE"
+trap 'rm -f "$LOCK_FILE"' EXIT

 show_help() {
    echo "Usage: ./run_all.sh [OPTIONS]"
@@ -32,16 +34,19 @@ show_help() {
    echo "  --max-pages N         Maximální počet stránek ke stažení z každého zdroje"
    echo "  --max-properties N    Maximální počet nemovitostí ke stažení z každého zdroje"
    echo "  --log-level LEVEL     Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
+    echo "  --keep N              Počet běhů v historii (výchozí: 5, 0=neomezeno)"
    echo "  -h, --help            Zobrazí tuto nápovědu"
    echo ""
    echo "Examples:"
    echo "  ./run_all.sh                                  # plný běh"
    echo "  ./run_all.sh --max-pages 1 --max-properties 10  # rychlý test"
    echo "  ./run_all.sh --log-level DEBUG                # s debug logováním"
+    echo "  ./run_all.sh --keep 10                        # uchovej 10 běhů v historii"
 }

 # Parse arguments
 SCRAPER_ARGS=""
+KEEP_ARG=""
 while [[ $# -gt 0 ]]; do
    case $1 in
        -h|--help)
@@ -52,6 +57,10 @@ while [[ $# -gt 0 ]]; do
            SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
            shift 2
            ;;
+        --keep)
+            KEEP_ARG="--keep $2"
+            shift 2
+            ;;
        *)
            echo "Unknown argument: $1"
            echo ""
@@ -103,7 +112,7 @@ python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((F

 END_EPOCH=$(date +%s)
 DURATION=$((END_EPOCH - START_EPOCH))
-python3 generate_status.py "$START_TIME" "$DURATION" "$LOG_FILE"
+python3 generate_status.py --start-time "$START_TIME" --duration "$DURATION" $KEEP_ARG

 echo ""
 echo "============================================================"
--- a/scrape_and_map.py
+++ b/scrape_and_map.py
@@ -15,6 +15,9 @@ import urllib.request
 import urllib.parse
 from datetime import datetime
 from pathlib import Path
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_sreality.json"

 logger = logging.getLogger(__name__)

@@ -209,6 +212,8 @@ def load_cache(json_path: str = "byty_sreality.json") -> dict[int, dict]:

 def scrape(max_pages: int | None = None, max_properties: int | None = None):
    """Main scraping function. Returns list of filtered estates."""
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    all_estates_raw = []
    cache = load_cache()

@@ -366,6 +371,21 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Sreality",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(unique_estates),
+        "cache_hits": cache_hits,
+        "excluded": {
+            "panel/síd": excluded_panel,
+            "<69 m²": excluded_small,
+            "bez GPS": excluded_no_gps,
+            "bez detailu": excluded_no_detail,
+        },
+    })
    return results


@@ -653,7 +673,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
      Skrýt zamítnuté
    </label>
  </div>
-  <div class="status-link"><a href="status.html">Scraper status</a></div>
+  <div class="status-link"><a href="/scrapers-status">Scraper status</a></div>
 </div>

 <script>
@@ -1089,8 +1109,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Sreality",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        # Save raw data as JSON backup
--- a/scrape_bezrealitky.py
+++ b/scrape_bezrealitky.py
@@ -15,6 +15,9 @@ import re
 import time
 import urllib.request
 from pathlib import Path
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_bezrealitky.json"

 logger = logging.getLogger(__name__)

@@ -171,6 +174,8 @@ def load_cache(json_path: str = "byty_bezrealitky.json") -> dict[int, dict]:


 def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

    logger.info("=" * 60)
@@ -374,6 +379,25 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Bezrealitky",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_adverts),
+        "pages": page - 1,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "dispozice": excluded_disp,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "bez GPS": excluded_no_gps,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


@@ -394,8 +418,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Bezrealitky",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_bezrealitky.json")
--- a/scrape_cityhome.py
+++ b/scrape_cityhome.py
@@ -14,6 +14,9 @@ import time
 import urllib.request
 from datetime import datetime
 from pathlib import Path
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_cityhome.json"

 logger = logging.getLogger(__name__)

@@ -203,6 +206,8 @@ def extract_project_gps(html: str) -> tuple[float, float] | None:


 def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    logger.info("=" * 60)
    logger.info("Stahuji inzeráty z CityHome (city-home.cz)")
    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
@@ -374,6 +379,23 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "CityHome",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "excluded": {
+            "prodáno": excluded_sold,
+            "typ": excluded_type,
+            "dispozice": excluded_disp,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "patro": excluded_floor,
+            "bez GPS": excluded_no_gps,
+        },
+    })
    return results


@@ -394,8 +416,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "CityHome",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_cityhome.json")
--- a/scrape_idnes.py
+++ b/scrape_idnes.py
@@ -17,6 +17,9 @@ import urllib.request
 import urllib.parse
 from html.parser import HTMLParser
 from pathlib import Path
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_idnes.json"

 logger = logging.getLogger(__name__)

@@ -279,6 +282,8 @@ def load_cache(json_path: str = "byty_idnes.json") -> dict[str, dict]:


 def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

    logger.info("=" * 60)
@@ -478,6 +483,25 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "iDNES",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "pages": page,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "dispozice": excluded_disp,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez GPS": excluded_no_gps,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


@@ -498,8 +522,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "iDNES",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_idnes.json")
--- a/scrape_psn.py
+++ b/scrape_psn.py
@@ -15,6 +15,9 @@ import time
 from datetime import datetime
 from pathlib import Path
 from urllib.parse import urlencode
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_psn.json"

 logger = logging.getLogger(__name__)

@@ -67,6 +70,8 @@ def format_price(price: int) -> str:


 def scrape(max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    logger.info("=" * 60)
    logger.info("Stahuji inzeráty z PSN.cz")
    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
@@ -93,6 +98,15 @@ def scrape(max_properties: int | None = None):
        data = fetch_json(url)
    except Exception as e:
        logger.error(f"Chyba při stahování: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - _run_start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
        return []

    all_units = data.get("units", {}).get("data", [])
@@ -241,6 +255,15 @@ def scrape(max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "PSN",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_units),
+        "excluded": excluded,
+    })
    return results


@@ -259,8 +282,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_properties=args.max_properties)
+    try:
+        estates = scrape(max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_psn.json")
--- a/scrape_realingo.py
+++ b/scrape_realingo.py
@@ -15,6 +15,9 @@ import re
 import time
 import urllib.request
 from pathlib import Path
+from scraper_stats import write_stats
+
+STATS_FILE = "stats_realingo.json"

 logger = logging.getLogger(__name__)

@@ -136,6 +139,8 @@ def load_cache(json_path: str = "byty_realingo.json") -> dict[int, dict]:


 def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

    logger.info("=" * 60)
@@ -333,6 +338,25 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Realingo",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "pages": page - 1,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "dispozice": excluded_category,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "bez GPS": excluded_no_gps,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


@@ -353,8 +377,22 @@ if __name__ == "__main__":
        handlers=[logging.StreamHandler()]
    )

+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Realingo",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_realingo.json")
--- a/scraper_stats.py
+++ b/scraper_stats.py
@@ -0,0 +1,15 @@
+"""Shared utility for writing per-scraper run statistics to JSON."""
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+HERE = Path(__file__).parent
+DATA_DIR = Path(os.environ.get("DATA_DIR", HERE))
+
+
+def write_stats(filename: str, stats: dict) -> None:
+    """Write scraper run stats dict to the data directory."""
+    path = DATA_DIR / filename
+    path.write_text(json.dumps(stats, ensure_ascii=False, indent=2), encoding="utf-8")
--- a/server.py
+++ b/server.py
@@ -0,0 +1,477 @@
+#!/usr/bin/env python3
+"""
+General-purpose HTTP server for maru-hleda-byt.
+
+Serves static files from DATA_DIR and additionally handles:
+  GET  /scrapers-status        → SSR scraper status page
+  GET  /api/ratings            → ratings.json contents
+  POST /api/ratings            → save entire ratings object
+  GET  /api/ratings/export     → same as GET, with download header
+  GET  /api/status             → status.json contents (JSON)
+  GET  /api/status/history     → scraper_history.json contents (JSON)
+"""
+
+from __future__ import annotations
+
+import functools
+import json
+import logging
+import os
+import sys
+from datetime import datetime
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+from pathlib import Path
+
+PORT = int(os.environ.get("SERVER_PORT", 8080))
+DATA_DIR = Path(os.environ.get("DATA_DIR", "."))
+RATINGS_FILE = DATA_DIR / "ratings.json"
+_LOG_LEVEL = getattr(logging, os.environ.get("LOG_LEVEL", "INFO").upper(), logging.INFO)
+
+logging.basicConfig(
+    level=_LOG_LEVEL,
+    format="%(asctime)s [server] %(levelname)s %(message)s",
+    datefmt="%Y-%m-%dT%H:%M:%S",
+)
+log = logging.getLogger(__name__)
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+COLORS = {
+    "sreality":    "#1976D2",
+    "realingo":    "#7B1FA2",
+    "bezrealitky": "#E65100",
+    "idnes":       "#C62828",
+    "psn":         "#2E7D32",
+    "cityhome":    "#00838F",
+}
+
+MONTHS_CZ = [
+    "ledna", "února", "března", "dubna", "května", "června",
+    "července", "srpna", "září", "října", "listopadu", "prosince",
+]
+
+
+def _load_json(path: Path, default=None):
+    """Read and parse JSON file; return default on missing or parse error."""
+    log.debug("_load_json: %s", path.resolve())
+    try:
+        if path.exists():
+            return json.loads(path.read_text(encoding="utf-8"))
+    except Exception as e:
+        log.warning("Failed to load %s: %s", path, e)
+    return default
+
+
+def _fmt_date(iso_str: str) -> str:
+    """Format ISO timestamp as Czech date string."""
+    try:
+        d = datetime.fromisoformat(iso_str)
+        return f"{d.day}. {MONTHS_CZ[d.month - 1]} {d.year}, {d.hour:02d}:{d.minute:02d}"
+    except Exception:
+        return iso_str
+
+
+def load_ratings() -> dict:
+    return _load_json(RATINGS_FILE, default={})
+
+
+def save_ratings(data: dict) -> None:
+    RATINGS_FILE.write_text(
+        json.dumps(data, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+
+# ── SSR status page ──────────────────────────────────────────────────────────
+
+_CSS = """\
+* { margin: 0; padding: 0; box-sizing: border-box; }
+body {
+  font-family: system-ui, -apple-system, sans-serif;
+  background: #f5f5f5; color: #333;
+  padding: 24px; max-width: 640px; margin: 0 auto;
+}
+h1 { font-size: 22px; margin-bottom: 4px; }
+.subtitle { color: #888; font-size: 13px; margin-bottom: 24px; }
+.card {
+  background: white; border-radius: 12px; padding: 20px;
+  box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px;
+}
+.card h2 { font-size: 15px; margin-bottom: 12px; color: #555; }
+.timestamp { font-size: 28px; font-weight: 700; color: #1976D2; }
+.timestamp-sub { font-size: 13px; color: #999; margin-top: 2px; }
+.summary-row {
+  display: flex; justify-content: space-between; align-items: center;
+  padding: 10px 0; border-bottom: 1px solid #f0f0f0;
+}
+.summary-row:last-child { border-bottom: none; }
+.summary-label { font-size: 13px; color: #666; }
+.summary-value { font-size: 18px; font-weight: 700; }
+.badge {
+  display: inline-block; padding: 2px 8px; border-radius: 4px;
+  font-size: 11px; font-weight: 600; color: white;
+}
+.badge-ok   { background: #4CAF50; }
+.badge-err  { background: #F44336; }
+.badge-skip { background: #FF9800; }
+.bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; }
+.bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; }
+.bar-fill  { height: 100%; border-radius: 4px; }
+.bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; }
+.loader-wrap {
+  display: flex; flex-direction: column; align-items: center;
+  justify-content: center; padding: 60px 0;
+}
+.spinner {
+  width: 40px; height: 40px; border: 4px solid #e0e0e0;
+  border-top-color: #1976D2; border-radius: 50%;
+  animation: spin 0.8s linear infinite;
+}
+@keyframes spin { to { transform: rotate(360deg); } }
+.loader-text { margin-top: 16px; color: #999; font-size: 14px; }
+.link-row { text-align: center; margin-top: 8px; }
+.link-row a { color: #1976D2; text-decoration: none; font-size: 14px; }
+.history-table { width: 100%; border-collapse: collapse; font-size: 12px; }
+.history-table th {
+  text-align: left; font-weight: 600; color: #999; font-size: 11px;
+  padding: 4px 6px 8px 6px; border-bottom: 2px solid #f0f0f0;
+}
+.history-table td { padding: 7px 6px; border-bottom: 1px solid #f5f5f5; vertical-align: middle; }
+.history-table tr:last-child td { border-bottom: none; }
+.history-table tr.latest td { background: #f8fbff; font-weight: 600; }
+.src-nums { display: flex; gap: 4px; flex-wrap: wrap; }
+.src-chip {
+  display: inline-block; padding: 1px 5px; border-radius: 3px;
+  font-size: 10px; color: white; font-variant-numeric: tabular-nums;
+}
+.clickable-row { cursor: pointer; }
+.clickable-row:hover td { background: #f0f7ff !important; }
+/* Modal */
+#md-overlay {
+  position: fixed; inset: 0; background: rgba(0,0,0,0.45);
+  display: flex; align-items: flex-start; justify-content: center;
+  z-index: 1000; padding: 40px 16px; overflow-y: auto;
+}
+#md-box {
+  background: white; border-radius: 12px; padding: 24px;
+  width: 100%; max-width: 620px; position: relative;
+  box-shadow: 0 8px 32px rgba(0,0,0,0.24); margin: auto;
+}
+#md-close {
+  position: absolute; top: 10px; right: 14px;
+  background: none; border: none; font-size: 26px; cursor: pointer;
+  color: #aaa; line-height: 1;
+}
+#md-close:hover { color: #333; }
+#md-box h3 { font-size: 15px; margin-bottom: 14px; padding-right: 24px; }
+.md-summary { display: flex; gap: 20px; flex-wrap: wrap; font-size: 13px; margin-bottom: 16px; color: #555; }
+.md-summary b { color: #333; }
+.detail-table { width: 100%; border-collapse: collapse; font-size: 12px; }
+.detail-table th {
+  text-align: left; color: #999; font-size: 11px; font-weight: 600;
+  padding: 4px 8px 6px 0; border-bottom: 2px solid #f0f0f0; white-space: nowrap;
+}
+.detail-table td { padding: 6px 8px 6px 0; border-bottom: 1px solid #f5f5f5; vertical-align: top; }
+.detail-table tr:last-child td { border-bottom: none; }
+"""
+
+_SOURCE_ORDER = ["Sreality", "Realingo", "Bezrealitky", "iDNES", "PSN", "CityHome"]
+_SOURCE_ABBR  = ["Sre", "Rea", "Bez", "iDN", "PSN", "CH"]
+
+
+def _sources_html(sources: list) -> str:
+    if not sources:
+        return ""
+    max_count = max((s.get("accepted", 0) for s in sources), default=1) or 1
+    parts = ['<div class="card"><h2>Zdroje</h2>']
+    for s in sources:
+        name = s.get("name", "?")
+        accepted = s.get("accepted", 0)
+        error = s.get("error")
+        exc = s.get("excluded", {})
+        excluded_total = sum(exc.values()) if isinstance(exc, dict) else s.get("excluded_total", 0)
+        color = COLORS.get(name.lower(), "#999")
+        pct = round(accepted / max_count * 100) if max_count else 0
+        if error:
+            badge = '<span class="badge badge-err">chyba</span>'
+        elif accepted == 0:
+            badge = '<span class="badge badge-skip">0</span>'
+        else:
+            badge = '<span class="badge badge-ok">OK</span>'
+        parts.append(
+            f'<div style="margin-bottom:12px;">'
+            f'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">'
+            f'<span style="font-weight:600;font-size:14px;">{name} {badge}</span>'
+            f'<span style="font-size:12px;color:#999;">{excluded_total} vyloučených</span>'
+            f'</div>'
+            f'<div class="bar-row">'
+            f'<div class="bar-track"><div class="bar-fill" style="width:{pct}%;background:{color};"></div></div>'
+            f'<span class="bar-count">{accepted}</span>'
+            f'</div></div>'
+        )
+    parts.append("</div>")
+    return "".join(parts)
+
+
+def _history_html(history: list) -> str:
+    if not history:
+        return ""
+    rows = list(reversed(history))
+    parts = [
+        '<div class="card">'
+        '<h2>Historie běhů <span style="font-size:11px;font-weight:400;color:#bbb;">– klikni pro detaily</span></h2>',
+        '<table class="history-table"><thead><tr>',
+        '<th>Datum</th><th>Trvání</th><th>Přijato&nbsp;/&nbsp;Dedup</th><th>Zdroje</th><th>OK</th>',
+        '</tr></thead><tbody>',
+    ]
+    for i, entry in enumerate(rows):
+        row_class = ' class="latest clickable-row"' if i == 0 else ' class="clickable-row"'
+        src_map = {s["name"]: s for s in entry.get("sources", []) if "name" in s}
+        chips = "".join(
+            f'<span class="src-chip" style="background:{"#F44336" if (src_map.get(name) or {}).get("error") else COLORS.get(name.lower(), "#999")}" title="{name}">'
+            f'{abbr}&nbsp;{src_map[name].get("accepted", 0) if name in src_map else "-"}</span>'
+            for name, abbr in zip(_SOURCE_ORDER, _SOURCE_ABBR)
+        )
+        ok_badge = (
+            '<span class="badge badge-err">chyba</span>'
+            if entry.get("success") is False
+            else '<span class="badge badge-ok">OK</span>'
+        )
+        dur = f'{entry["duration_sec"]}s' if entry.get("duration_sec") is not None else "-"
+        parts.append(
+            f'<tr{row_class} data-idx="{i}">'
+            f'<td>{_fmt_date(entry.get("timestamp", ""))}</td>'
+            f'<td>{dur}</td>'
+            f'<td>{entry.get("total_accepted", "-")}&nbsp;/&nbsp;{entry.get("deduplicated", "-")}</td>'
+            f'<td><div class="src-nums">{chips}</div></td>'
+            f'<td>{ok_badge}</td>'
+            f'</tr>'
+        )
+    parts.append("</tbody></table></div>")
+    return "".join(parts)
+
+
+def _modal_script(rows_json: str) -> str:
+    """Return the modal overlay HTML + JS for the history detail popup."""
+    return (
+        '<div id="md-overlay" style="display:none">'
+        '<div id="md-box"><button id="md-close">&times;</button>'
+        '<div id="md-body"></div></div></div>\n'
+        '<script>\n(function(){\n'
+        f'var H={rows_json};\n'
+        'var C={"sreality":"#1976D2","realingo":"#7B1FA2","bezrealitky":"#E65100","idnes":"#C62828","psn":"#2E7D32","cityhome":"#00838F"};\n'
+        'var MN=["ledna","února","března","dubna","května","června","července","srpna","září","října","listopadu","prosince"];\n'
+        'function fd(s){var d=new Date(s);return d.getDate()+". "+MN[d.getMonth()]+" "+d.getFullYear()+", "+String(d.getHours()).padStart(2,"0")+":"+String(d.getMinutes()).padStart(2,"0");}\n'
+        'function openModal(idx){\n'
+        '  var e=H[idx],src=e.sources||[];\n'
+        '  var h="<h3>Detaily b\u011bhu \u2013 "+fd(e.timestamp)+"</h3>";\n'
+        '  h+="<div class=\\"md-summary\\">";\n'
+        '  if(e.duration_sec!=null) h+="<span><b>Trvání:</b> "+e.duration_sec+"s</span>";\n'
+        '  if(e.total_accepted!=null) h+="<span><b>Přijato:</b> "+e.total_accepted+"</span>";\n'
+        '  if(e.deduplicated!=null) h+="<span><b>Po dedup:</b> "+e.deduplicated+"</span>";\n'
+        '  h+="</div>";\n'
+        '  h+="<table class=\\"detail-table\\"><thead><tr>";\n'
+        '  h+="<th>Zdroj</th><th>Přijato</th><th>Staženo</th><th>Stránky</th><th>Cache</th><th>Vyloučeno</th><th>Čas</th><th>OK</th>";\n'
+        '  h+="</tr></thead><tbody>";\n'
+        '  src.forEach(function(s){\n'
+        '    var nm=s.name||"?",col=C[nm.toLowerCase()]||"#999";\n'
+        '    var exc=s.excluded||{};\n'
+        '    var excStr=Object.entries(exc).filter(function(kv){return kv[1]>0;}).map(function(kv){return kv[0]+":&nbsp;"+kv[1];}).join(", ")||"\u2013";\n'
+        '    var ok=s.error?"<span class=\\"badge badge-err\\" title=\\""+s.error+"\\">chyba</span>":"<span class=\\"badge badge-ok\\">OK</span>";\n'
+        '    var dot="<span style=\\"display:inline-block;width:8px;height:8px;border-radius:50%;background:"+col+";margin-right:5px;\\"></span>";\n'
+        '    h+="<tr>";\n'
+        '    h+="<td>"+dot+nm+"</td>";\n'
+        '    h+="<td>"+(s.accepted!=null?s.accepted:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.fetched!=null?s.fetched:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.pages!=null?s.pages:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.cache_hits!=null?s.cache_hits:"\u2013")+"</td>";\n'
+        '    h+="<td style=\\"font-size:11px;color:#666;\\">"+excStr+"</td>";\n'
+        '    h+="<td>"+(s.duration_sec!=null?s.duration_sec+"s":"\u2013")+"</td>";\n'
+        '    h+="<td>"+ok+"</td></tr>";\n'
+        '  });\n'
+        '  h+="</tbody></table>";\n'
+        '  document.getElementById("md-body").innerHTML=h;\n'
+        '  document.getElementById("md-overlay").style.display="flex";\n'
+        '}\n'
+        'function closeModal(){document.getElementById("md-overlay").style.display="none";}\n'
+        'var tb=document.querySelector(".history-table tbody");\n'
+        'if(tb)tb.addEventListener("click",function(e){var tr=e.target.closest("tr[data-idx]");if(tr)openModal(parseInt(tr.dataset.idx,10));});\n'
+        'document.getElementById("md-close").addEventListener("click",closeModal);\n'
+        'document.getElementById("md-overlay").addEventListener("click",function(e){if(e.target===this)closeModal();});\n'
+        'document.addEventListener("keydown",function(e){if(e.key==="Escape")closeModal();});\n'
+        '})();\n</script>'
+    )
+
+
+def _render_status_html(status: dict | None, history: list, is_running: bool = False) -> str:
+    """Generate the complete HTML page for /scrapers-status."""
+    head_open = (
+        '<!DOCTYPE html>\n<html lang="cs">\n<head>\n'
+        '<meta charset="UTF-8">\n'
+        '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
+        f'<title>Scraper status</title>\n<style>{_CSS}</style>\n'
+    )
+    page_header = '<h1>Scraper status</h1>\n<div class="subtitle">maru-hleda-byt</div>\n'
+    footer = '<div class="link-row"><a href="/mapa_bytu.html">Otevřít mapu</a></div>'
+
+    if status is None:
+        return (
+            head_open + '</head>\n<body>\n' + page_header
+            + '<div class="card"><p style="color:#F44336">Status není k dispozici.</p></div>\n'
+            + footer + '\n</body>\n</html>'
+        )
+
+    if is_running:
+        return (
+            head_open
+            + '<meta http-equiv="refresh" content="30">\n'
+            + '</head>\n<body>\n' + page_header
+            + '<div class="loader-wrap"><div class="spinner"></div>'
+            + '<div class="loader-text">Scraper právě běží…</div></div>\n'
+            + footer + '\n</body>\n</html>'
+        )
+
+    # ── Done state ────────────────────────────────────────────────────────────
+    ts = status.get("timestamp", "")
+    duration = status.get("duration_sec")
+    total_accepted = status.get("total_accepted", 0)
+    deduplicated = status.get("deduplicated")
+
+    ts_card = (
+        '<div class="card"><h2>Poslední scrape</h2>'
+        f'<div class="timestamp">{_fmt_date(ts)}</div>'
+        + (f'<div class="timestamp-sub">Trvání: {round(duration)}s</div>' if duration is not None else "")
+        + '</div>'
+    )
+
+    sum_card = (
+        '<div class="card"><h2>Souhrn</h2>'
+        f'<div class="summary-row"><span class="summary-label">Vyhovujících bytů</span>'
+        f'<span class="summary-value" style="color:#4CAF50">{total_accepted}</span></div>'
+        + (
+            f'<div class="summary-row"><span class="summary-label">Po deduplikaci (v mapě)</span>'
+            f'<span class="summary-value" style="color:#1976D2">{deduplicated}</span></div>'
+            if deduplicated is not None else ""
+        )
+        + '</div>'
+    )
+
+    rows_for_js = list(reversed(history))
+    body = (
+        page_header
+        + ts_card + "\n"
+        + sum_card + "\n"
+        + _sources_html(status.get("sources", [])) + "\n"
+        + _history_html(history) + "\n"
+        + footer
+    )
+    modal = _modal_script(json.dumps(rows_for_js, ensure_ascii=False))
+    return head_open + '</head>\n<body>\n' + body + '\n' + modal + '\n</body>\n</html>'
+
+
+# ── HTTP handler ──────────────────────────────────────────────────────────────
+
+class Handler(SimpleHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass  # suppress default access log; use our own where needed
+
+    def _send_json(self, status: int, body, extra_headers=None):
+        payload = json.dumps(body, ensure_ascii=False).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json; charset=utf-8")
+        self.send_header("Content-Length", str(len(payload)))
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        if extra_headers:
+            for k, v in extra_headers.items():
+                self.send_header(k, v)
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def do_OPTIONS(self):
+        self.send_response(204)
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+
+    def do_GET(self):
+        if self.path.startswith("/api/"):
+            self._handle_api_get()
+        elif self.path.rstrip("/") == "/scrapers-status":
+            self._serve_status_page()
+        else:
+            log.debug("GET %s → static file: %s", self.path, self.translate_path(self.path))
+            super().do_GET()
+
+    def _handle_api_get(self):
+        if self.path in ("/api/ratings", "/api/ratings/export"):
+            ratings = load_ratings()
+            extra = None
+            if self.path == "/api/ratings/export":
+                extra = {"Content-Disposition": 'attachment; filename="ratings.json"'}
+            log.info("GET %s → %d ratings", self.path, len(ratings))
+            self._send_json(200, ratings, extra)
+        elif self.path == "/api/status":
+            data = _load_json(DATA_DIR / "status.json")
+            if data is None:
+                self._send_json(404, {"error": "status not available"})
+                return
+            log.info("GET /api/status → ok")
+            self._send_json(200, data)
+        elif self.path == "/api/status/history":
+            data = _load_json(DATA_DIR / "scraper_history.json", default=[])
+            if not isinstance(data, list):
+                data = []
+            log.info("GET /api/status/history → %d entries", len(data))
+            self._send_json(200, data)
+        else:
+            self._send_json(404, {"error": "not found"})
+
+    def _serve_status_page(self):
+        status = _load_json(DATA_DIR / "status.json")
+        history = _load_json(DATA_DIR / "scraper_history.json", default=[])
+        if not isinstance(history, list):
+            history = []
+        is_running = (DATA_DIR / "scraper_running.json").exists()
+        html = _render_status_html(status, history, is_running)
+        payload = html.encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def do_POST(self):
+        if self.path == "/api/ratings":
+            length = int(self.headers.get("Content-Length", 0))
+            if length == 0:
+                self._send_json(400, {"error": "empty body"})
+                return
+            try:
+                raw = self.rfile.read(length)
+                data = json.loads(raw.decode("utf-8"))
+            except Exception as e:
+                log.warning("Bad request body: %s", e)
+                self._send_json(400, {"error": "invalid JSON"})
+                return
+            if not isinstance(data, dict):
+                self._send_json(400, {"error": "expected JSON object"})
+                return
+            save_ratings(data)
+            log.info("POST /api/ratings → saved %d ratings", len(data))
+            self._send_json(200, {"ok": True, "count": len(data)})
+        else:
+            self._send_json(404, {"error": "not found"})
+
+
+if __name__ == "__main__":
+    log.info("Server starting on port %d, data dir: %s", PORT, DATA_DIR)
+    handler = functools.partial(Handler, directory=str(DATA_DIR))
+    server = HTTPServer(("0.0.0.0", PORT), handler)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        log.info("Stopped.")
+        sys.exit(0)
--- a/status.html
+++ b/status.html
@@ -1,204 +0,0 @@
-<!DOCTYPE html>
-<html lang="cs">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Scraper status</title>
-<style>
-  * { margin: 0; padding: 0; box-sizing: border-box; }
-  body {
-    font-family: system-ui, -apple-system, sans-serif;
-    background: #f5f5f5; color: #333;
-    padding: 24px; max-width: 640px; margin: 0 auto;
-  }
-  h1 { font-size: 22px; margin-bottom: 4px; }
-  .subtitle { color: #888; font-size: 13px; margin-bottom: 24px; }
-  .card {
-    background: white; border-radius: 12px; padding: 20px;
-    box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px;
-  }
-  .card h2 { font-size: 15px; margin-bottom: 12px; color: #555; }
-  .timestamp {
-    font-size: 28px; font-weight: 700; color: #1976D2;
-  }
-  .timestamp-ago { font-size: 13px; color: #999; margin-top: 2px; }
-
-  /* Source table */
-  .source-table { width: 100%; border-collapse: collapse; }
-  .source-table td { padding: 8px 0; border-bottom: 1px solid #f0f0f0; font-size: 14px; }
-  .source-table tr:last-child td { border-bottom: none; }
-  .source-table .name { font-weight: 600; }
-  .source-table .count { text-align: right; font-variant-numeric: tabular-nums; }
-  .source-table .rejected { text-align: right; color: #999; font-size: 12px; }
-  .badge {
-    display: inline-block; padding: 2px 8px; border-radius: 4px;
-    font-size: 11px; font-weight: 600; color: white;
-  }
-  .badge-ok { background: #4CAF50; }
-  .badge-err { background: #F44336; }
-  .badge-skip { background: #FF9800; }
-
-  /* Summary bar */
-  .summary-row {
-    display: flex; justify-content: space-between; align-items: center;
-    padding: 10px 0; border-bottom: 1px solid #f0f0f0;
-  }
-  .summary-row:last-child { border-bottom: none; }
-  .summary-label { font-size: 13px; color: #666; }
-  .summary-value { font-size: 18px; font-weight: 700; }
-
-  /* Source bar chart */
-  .bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; }
-  .bar-label { width: 90px; font-size: 12px; text-align: right; color: #666; }
-  .bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; position: relative; }
-  .bar-fill { height: 100%; border-radius: 4px; transition: width 0.5s ease; }
-  .bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; }
-
-  /* Loader */
-  .loader-wrap {
-    display: flex; flex-direction: column; align-items: center;
-    justify-content: center; padding: 60px 0;
-  }
-  .spinner {
-    width: 40px; height: 40px; border: 4px solid #e0e0e0;
-    border-top-color: #1976D2; border-radius: 50%;
-    animation: spin 0.8s linear infinite;
-  }
-  @keyframes spin { to { transform: rotate(360deg); } }
-  .loader-text { margin-top: 16px; color: #999; font-size: 14px; }
-
-  .error-msg { color: #F44336; padding: 40px 0; text-align: center; }
-  .link-row { text-align: center; margin-top: 8px; }
-  .link-row a { color: #1976D2; text-decoration: none; font-size: 14px; }
-</style>
-</head>
-<body>
-
-<h1>Scraper status</h1>
-<div class="subtitle">maru-hleda-byt</div>
-
-<div id="content">
-  <div class="loader-wrap">
-    <div class="spinner"></div>
-    <div class="loader-text">Nacitam status...</div>
-  </div>
-</div>
-
-<div class="link-row"><a href="mapa_bytu.html">Otevrit mapu</a></div>
-
-<script>
-var COLORS = {
-  sreality: '#1976D2',
-  realingo: '#7B1FA2',
-  bezrealitky: '#E65100',
-  idnes: '#C62828',
-  psn: '#2E7D32',
-  cityhome: '#00838F',
-};
-
-function timeAgo(dateStr) {
-  var d = new Date(dateStr);
-  var now = new Date();
-  var diff = Math.floor((now - d) / 1000);
-  if (diff < 60) return 'prave ted';
-  if (diff < 3600) return Math.floor(diff / 60) + ' min zpet';
-  if (diff < 86400) return Math.floor(diff / 3600) + ' hod zpet';
-  return Math.floor(diff / 86400) + ' dni zpet';
-}
-
-function formatDate(dateStr) {
-  var d = new Date(dateStr);
-  var day = d.getDate();
-  var months = ['ledna','unora','brezna','dubna','kvetna','cervna',
-    'cervence','srpna','zari','rijna','listopadu','prosince'];
-  var hh = String(d.getHours()).padStart(2, '0');
-  var mm = String(d.getMinutes()).padStart(2, '0');
-  return day + '. ' + months[d.getMonth()] + ' ' + d.getFullYear() + ', ' + hh + ':' + mm;
-}
-
-function render(data) {
-  // Check if scrape is currently running
-  if (data.status === 'running') {
-    document.getElementById('content').innerHTML =
-      '<div class="loader-wrap">' +
-      '<div class="spinner"></div>' +
-      '<div class="loader-text">Scraper prave bezi...</div>' +
-      '</div>';
-    setTimeout(loadStatus, 30000);
-    return;
-  }
-
-  var sources = data.sources || [];
-  var totalOk = 0, totalRej = 0;
-  var maxCount = 0;
-  sources.forEach(function(s) {
-    totalOk += s.accepted || 0;
-    totalRej += s.rejected || 0;
-    if (s.accepted > maxCount) maxCount = s.accepted;
-  });
-
-  var html = '';
-
-  // Timestamp card
-  html += '<div class="card">';
-  html += '<h2>Posledni scrape</h2>';
-  html += '<div class="timestamp">' + formatDate(data.timestamp) + '</div>';
-  html += '<div class="timestamp-ago">' + timeAgo(data.timestamp) + '</div>';
-  if (data.duration_sec) {
-    html += '<div class="timestamp-ago">Trvani: ' + Math.round(data.duration_sec) + 's</div>';
-  }
-  html += '</div>';
-
-  // Summary card
-  html += '<div class="card">';
-  html += '<h2>Souhrn</h2>';
-  html += '<div class="summary-row"><span class="summary-label">Vyhovujicich bytu</span><span class="summary-value" style="color:#4CAF50">' + totalOk + '</span></div>';
-  html += '<div class="summary-row"><span class="summary-label">Vyloucenych</span><span class="summary-value" style="color:#999">' + totalRej + '</span></div>';
-  if (data.deduplicated !== undefined) {
-    html += '<div class="summary-row"><span class="summary-label">Po deduplikaci (v mape)</span><span class="summary-value" style="color:#1976D2">' + data.deduplicated + '</span></div>';
-  }
-  html += '</div>';
-
-  // Sources card
-  html += '<div class="card">';
-  html += '<h2>Zdroje</h2>';
-  sources.forEach(function(s) {
-    var color = COLORS[s.name.toLowerCase()] || '#999';
-    var pct = maxCount > 0 ? Math.round((s.accepted / maxCount) * 100) : 0;
-    var badge = s.error
-      ? '<span class="badge badge-err">chyba</span>'
-      : (s.accepted === 0 ? '<span class="badge badge-skip">0</span>' : '<span class="badge badge-ok">OK</span>');
-
-    html += '<div style="margin-bottom:12px;">';
-    html += '<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">';
-    html += '<span style="font-weight:600;font-size:14px;">' + s.name + ' ' + badge + '</span>';
-    html += '<span style="font-size:12px;color:#999;">' + (s.rejected || 0) + ' vyloucenych</span>';
-    html += '</div>';
-    html += '<div class="bar-row">';
-    html += '<div class="bar-track"><div class="bar-fill" style="width:' + pct + '%;background:' + color + ';"></div></div>';
-    html += '<span class="bar-count">' + (s.accepted || 0) + '</span>';
-    html += '</div>';
-    html += '</div>';
-  });
-  html += '</div>';
-
-  document.getElementById('content').innerHTML = html;
-}
-
-function loadStatus() {
-  fetch('status.json?t=' + Date.now())
-    .then(function(r) {
-      if (!r.ok) throw new Error(r.status);
-      return r.json();
-    })
-    .then(render)
-    .catch(function(err) {
-      document.getElementById('content').innerHTML =
-        '<div class="error-msg">Status zatim neni k dispozici.<br><small>(' + err.message + ')</small></div>';
-    });
-}
-
-loadStatus();
-</script>
-</body>
-</html>
Author	SHA1	Message	Date
Marie Michalova	7d3021efbf	Remove tracked generated/data files and fix map link on status page - Remove byty_*.json, mapa_bytu.html, .DS_Store and settings.local.json from git tracking (already in .gitignore, files kept locally) - Fix "Otevřít mapu" link on scraper status page: / → /mapa_bytu.html Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-26 18:50:16 +01:00
kacerr	23d208a5b7	Merge pull request 'Add scraper status collection and presentation' (#3 ) from add-scraper-statuses into main Reviewed-on: #3	2026-02-26 09:04:23 +00:00
Jan Novak	00c9144010	Fix DATA_DIR usage in stats/history paths, set env in Dockerfile, add validation docs All checks were successful Build and Push / build (push) Successful in 5s Details - scraper_stats.py: respect DATA_DIR env var when writing stats_*.json files - generate_status.py: read stats files and write history from DATA_DIR instead of HERE - build/Dockerfile: set DATA_DIR=/app/data as default env var - docs/validation.md: end-to-end Docker validation recipe Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 09:46:16 +01:00
Jan Novak	44c02b45b4	Increase history retention to 20, run scrapers every 4 hours All checks were successful Build and Push / build (push) Successful in 7s Details - generate_status.py: raise --keep default from 5 to 20 entries - build/crontab: change schedule from 06:00/18:00 to every 4 hours (*/4) covers 6 runs/day ≈ 3.3 days of history at default retention Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 08:53:27 +01:00
Jan Novak	5fb3b984b6	Add status dashboard, server, scraper stats, and DATA_DIR support All checks were successful Build and Push / build (push) Successful in 7s Details Key changes: - Replace ratings_server.py + status.html with a unified server.py that serves the map, scraper status dashboard, and ratings API in one process - Add scraper_stats.py utility: each scraper writes per-run stats (fetched, accepted, excluded, duration) to stats_<source>.json for the status page - generate_status.py: respect DATA_DIR env var so status.json lands in the configured data directory instead of always the project root - run_all.sh: replace the {"status":"running"} overwrite of status.json with a dedicated scraper_running.json lock file; trap on EXIT ensures cleanup even on kill/error, preventing the previous run's results from being wiped - server.py: detect running state via scraper_running.json existence instead of status["status"] field, eliminating the dual-use race condition - Makefile: add serve (local dev), debug (Docker debug container) targets; add SERVER_PORT variable - build/Dockerfile + entrypoint.sh: switch to server.py, set DATA_DIR, adjust volume mounts - .gitignore: add .json and .log to keep runtime data files out of VCS - mapa_bytu.html: price-per-m² colouring, status link, UX tweaks Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 00:30:25 +01:00
kacerr	6f49533c94	Merge pull request 'Rewrite PSN + CityHome scrapers, add price/m² map coloring, ratings system, and status dashboard' (#2 ) from ui-tweaks/2026-02-17 into main Reviewed-on: #2	2026-02-25 21:26:51 +00:00