Move Realingo scraper to run last in pipeline

Reorder scrapers: Sreality → Bezrealitky → iDNES → PSN+CityHome → Realingo → Merge Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Merge pull request 'Reliability improvements and cleanup' (#5 ) from improve/reliability-and-fixes into main
2026-02-27 21:35:54 +01:00 · 2026-02-27 10:26:04 +00:00 · 2026-02-27 10:44:08 +01:00 · 2026-02-27 10:36:37 +01:00 · 2026-02-26 21:14:48 +01:00 · 2026-02-26 20:42:35 +01:00
34 changed files with 2893 additions and 32790 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,31 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "WebFetch(domain:github.com)",
-      "WebFetch(domain:www.sreality.cz)",
-      "WebFetch(domain:webscraping.pro)",
-      "WebFetch(domain:raw.githubusercontent.com)",
-      "Bash(python3:*)",
-      "Bash(open:*)",
-      "WebFetch(domain:www.realingo.cz)",
-      "WebFetch(domain:api.realingo.cz)",
-      "Bash(curl:*)",
-      "Bash(grep:*)",
-      "WebFetch(domain:www.realitni-pes.cz)",
-      "WebFetch(domain:www.bezrealitky.cz)",
-      "WebFetch(domain:apify.com)",
-      "WebFetch(domain:www.bezrealitky.com)",
-      "WebFetch(domain:reality.idnes.cz)",
-      "Bash(# Final checks: robots.txt and response time for rate limiting clues curl -s -L -H \"\"User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/120.0.0.0 Safari/537.36\"\" \"\"https://reality.idnes.cz/robots.txt\"\")",
-      "WebFetch(domain:www.cityhome.cz)",
-      "WebFetch(domain:www.psn.cz)",
-      "WebFetch(domain:www.city-home.cz)",
-      "WebFetch(domain:psn.cz)",
-      "WebFetch(domain:api.psn.cz)",
-      "Bash(done)",
-      "Bash(# Final summary: count total units across all projects\n# Get the total count from the unitsCountData we already extracted\necho \"\"From unitsCountData on /prodej page:\"\"\necho \"\"  type_id 0 \\(Prodej bytů a ateliérů\\): 146\"\"\necho \"\"  type_id 1 \\(Prodej komerčních nemovitostí\\): 14\"\"\necho \"\"  type_id 2 \\(Pronájem bytů\\): 3\"\"\necho \"\"  type_id 3 \\(Pronájem komerčních nemovitostí\\): 48\"\"\necho \"\"\"\"\necho \"\"Total for-sale projects: 19\"\"\necho \"\"\"\"\necho \"\"Disposition counts from the data:\"\"\npython3 << 'PYEOF'\n# Extract disposition counts from prodej page\nimport re\n\nwith open\\('/tmp/psn_prodej_p1.html', 'r', encoding='utf-8'\\) as f:\n    html = f.read\\(\\)\n\n# Find disposition data\nidx = html.find\\('\\\\\\\\\"disposition\\\\\\\\\":['\\)\nif idx >= 0:\n    chunk = html[idx:idx+2000].replace\\('\\\\\\\\\"', '\"'\\)\n    # Extract name and count pairs\n    import re\n    pairs = re.findall\\(r'\"name\":\"\\([^\"]+\\)\",\"count\":\\(\\\\d+\\)', chunk\\)\n    for name, count in pairs:\n        print\\(f\"  {name}: {count}\"\\)\nPYEOF)",
-      "Bash(ls:*)",
-      "Bash(chmod:*)"
-    ]
-  }
-}
--- a/.gitea/workflows/build.yaml
+++ b/.gitea/workflows/build.yaml
@@ -0,0 +1,35 @@
+name: Build and Push
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Image tag'
+        required: true
+        default: 'latest'
+  push:
+    tags:
+      - '*'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Login to Gitea registry
+        run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login -u ${{ github.actor }} --password-stdin gitea.home.hrajfrisbee.cz
+
+      - name: Build and push
+        run: |
+          TAG=${{ github.ref_name }}
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            TAG=${{ inputs.tag }}
+          fi
+          IMAGE=gitea.home.hrajfrisbee.cz/${{ github.repository }}:$TAG
+          docker build -f build/Dockerfile -t $IMAGE .
+          docker push $IMAGE
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,8 @@
+.vscode/
+__pycache__/
+.DS_Store
+byty_*.json
+*.json
+*.log
+mapa_bytu.html
+
--- a/99
+++ b/99
@@ -0,0 +1,99 @@
+IMAGE_NAME        := maru-hleda-byt
+CONTAINER_NAME    := maru-hleda-byt
+VOLUME_NAME       := maru-hleda-byt-data
+VALIDATION_CONTAINER := maru-hleda-byt-validation
+VALIDATION_VOLUME  := maru-hleda-byt-validation-data
+DEBUG_CONTAINER   := maru-hleda-byt-debug
+DEBUG_VOLUME      := maru-hleda-byt-debug-data
+DEBUG_PORT        ?= 8082
+PORT              := 8080
+SERVER_PORT       ?= 8080
+
+.PHONY: build run stop logs scrape restart clean help serve validation validation-local validation-stop validation-local-debug debug debug-stop
+
+help:
+	@echo "Available targets:"
+	@echo "  build               - Build the Docker image"
+	@echo "  run                 - Build and run the Docker container in the background"
+	@echo "  stop                - Stop and remove the running container"
+	@echo "  logs                - Show live container logs"
+	@echo "  scrape              - Run the scraping script inside the container"
+	@echo "  validation          - Run scraping with limits (1 page, 10 properties) in Docker container"
+	@echo "  validation-stop     - Stop the validation Docker container"
+	@echo "  validation-local    - Run scraping with limits (1 page, 10 properties) locally with Python"
+	@echo "  validation-local-debug - Run validation locally with DEBUG logging"
+	@echo "  restart             - Restart the container (stop and run again)"
+	@echo "  clean               - Stop container and remove the Docker image"
+	@echo "  serve               - Start server.py locally on port 8080"
+	@echo "  debug               - Build and run debug Docker container with limited scrape (port $(DEBUG_PORT))"
+	@echo "  debug-stop          - Stop and remove the debug Docker container"
+	@echo "  help                - Show this help message"
+
+build:
+	docker build -f build/Dockerfile -t $(IMAGE_NAME) .
+
+run: build
+	docker run -d --name $(CONTAINER_NAME) \
+		-p $(PORT):8080 \
+		-v $(VOLUME_NAME):/app/data \
+		--restart unless-stopped \
+		$(IMAGE_NAME)
+	@echo "Map will be at http://localhost:$(PORT)/mapa_bytu.html"
+
+stop:
+	docker stop $(CONTAINER_NAME) && docker rm $(CONTAINER_NAME)
+
+logs:
+	docker logs -f $(CONTAINER_NAME)
+
+scrape:
+	docker exec $(CONTAINER_NAME) bash /app/run_all.sh
+
+validation: build
+	@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
+	@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
+	docker run -d --name $(VALIDATION_CONTAINER) \
+		-p 8081:8080 \
+		-v $(VALIDATION_VOLUME):/app/data \
+		--restart unless-stopped \
+		$(IMAGE_NAME)
+	@sleep 2
+	docker exec $(VALIDATION_CONTAINER) bash /app/run_all.sh --max-pages 1 --max-properties 10
+	@echo "Validation map will be at http://localhost:8081/mapa_bytu.html"
+
+validation-stop:
+	@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
+	@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
+	@echo "Validation container stopped and removed"
+
+debug: build
+	@docker stop $(DEBUG_CONTAINER) 2>/dev/null || true
+	@docker rm $(DEBUG_CONTAINER) 2>/dev/null || true
+	docker run -d --name $(DEBUG_CONTAINER) \
+		-p $(DEBUG_PORT):8080 \
+		-v $(DEBUG_VOLUME):/app/data \
+		-e LOG_LEVEL=DEBUG \
+		$(IMAGE_NAME)
+	@sleep 2
+	docker exec $(DEBUG_CONTAINER) bash /app/run_all.sh --max-pages 1 --max-properties 10
+	@echo "Debug app at http://localhost:$(DEBUG_PORT)/mapa_bytu.html"
+	@echo "Debug status at http://localhost:$(DEBUG_PORT)/scrapers-status"
+
+debug-stop:
+	@docker stop $(DEBUG_CONTAINER) 2>/dev/null || true
+	@docker rm $(DEBUG_CONTAINER) 2>/dev/null || true
+	@echo "Debug container stopped and removed"
+
+serve:
+	DATA_DIR=. SERVER_PORT=$(SERVER_PORT) python3 server.py
+
+validation-local:
+	./run_all.sh --max-pages 1 --max-properties 10
+
+validation-local-debug:
+	./run_all.sh --max-pages 1 --max-properties 10 --log-level DEBUG
+
+restart: stop run
+
+clean: stop
+	docker rmi $(IMAGE_NAME)
--- a/README.md
+++ b/README.md
@@ -0,0 +1,239 @@
+# Maru hleda byt
+
+Apartment search aggregator for Prague. Scrapes listings from 6 Czech real estate portals, filters them by configurable criteria, deduplicates across sources, and generates a single interactive map with all matching apartments.
+
+Built for a specific use case: finding a 3+kk or larger apartment in Prague, excluding panel construction ("panelak") and housing estates ("sidliste"), with personal rating support.
+
+## How it works
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        run_all.sh                           │
+│  Orchestrates all scrapers, then merges results into map    │
+├─────────┬──────────┬──────────┬────────┬────────┬───────────┤
+│Sreality │Realingo  │Bezreal.  │iDNES   │PSN     │CityHome   │
+│ (API)   │ (HTML)   │ (HTML)   │ (HTML) │ (HTML) │ (HTML)    │
+├─────────┴──────────┴──────────┴────────┴────────┴───────────┤
+│              merge_and_map.py                               │
+│  Loads all byty_*.json, deduplicates, generates HTML map    │
+├─────────────────────────────────────────────────────────────┤
+│              mapa_bytu.html                                 │
+│  Interactive Leaflet.js map with filters & ratings          │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### Pipeline
+
+1. **Scraping** -- Each scraper independently fetches listings from its portal, applies filters, and saves results to a JSON file (`byty_<source>.json`).
+2. **Merging** -- `merge_and_map.py` loads all 6 JSON files, deduplicates listings (by street name + price + area), and generates the final `mapa_bytu.html`.
+3. **Serving** -- The HTML map can be opened locally as a file, or served via Docker with a built-in HTTP server.
+
+### Execution order in `run_all.sh`
+
+Scrapers run sequentially (to avoid overwhelming any single portal), except PSN and CityHome which run in parallel (different sites). If a scraper fails, it is logged but does not abort the pipeline -- remaining scrapers continue.
+
+```
+1. scrape_and_map.py   (Sreality)
+2. scrape_realingo.py  (Realingo)
+3. scrape_bezrealitky.py (Bezrealitky)
+4. scrape_idnes.py     (iDNES Reality)
+5. scrape_psn.py + scrape_cityhome.py  (parallel)
+6. merge_and_map.py    (merge + map generation)
+```
+
+## Scrapers
+
+All scrapers share the same CLI interface and a consistent two-phase approach:
+
+1. **Phase 1** -- Fetch listing pages (paginated) to get a list of all available apartments.
+2. **Phase 2** -- Fetch detail pages for each listing to get floor, construction type, and other data needed for filtering.
+
+Each scraper uses a **JSON file cache**: if a listing's `hash_id` and `price` haven't changed since the last run, the cached data is reused and the detail page is not re-fetched. This significantly reduces runtime on subsequent runs.
+
+### Source details
+
+| Scraper | Portal | Data source | Output file | Notes |
+|---------|--------|-------------|-------------|-------|
+| `scrape_and_map.py` | [Sreality.cz](https://sreality.cz) | REST API (JSON) | `byty_sreality.json` | Main scraper. Also contains `generate_map()` used by all other scripts. |
+| `scrape_realingo.py` | [Realingo.cz](https://realingo.cz) | `__NEXT_DATA__` JSON in HTML | `byty_realingo.json` | Next.js app, data extracted from server-side props. |
+| `scrape_bezrealitky.py` | [Bezrealitky.cz](https://bezrealitky.cz) | `__NEXT_DATA__` Apollo cache in HTML | `byty_bezrealitky.json` | Next.js app with Apollo GraphQL cache in page source. |
+| `scrape_idnes.py` | [Reality iDNES](https://reality.idnes.cz) | HTML parsing (regex) | `byty_idnes.json` | Traditional HTML site. GPS extracted from `dataLayer.push()`. Retry logic with 5 attempts and exponential backoff. |
+| `scrape_psn.py` | [PSN.cz](https://psn.cz) | RSC (React Server Components) escaped JSON in HTML | `byty_psn.json` | Uses `curl` instead of `urllib` due to Cloudflare SSL issues. Hardcoded list of Prague projects with GPS coordinates. |
+| `scrape_cityhome.py` | [city-home.cz](https://city-home.cz) | HTML table parsing (data attributes on `<tr>`) | `byty_cityhome.json` | CityHome/SATPO developer projects. GPS fetched from project locality pages. |
+
+### Scraper filter criteria
+
+All scrapers apply the same core filters (with minor per-source variations):
+
+| Filter | Value | Notes |
+|--------|-------|-------|
+| **Max price** | 13 500 000 CZK | PSN and CityHome use 14 000 000 CZK |
+| **Min area** | 69 m^2 | |
+| **Min floor** | 2. NP (2nd floor) | 2nd floor apartments are included but flagged on the map |
+| **Dispositions** | 3+kk, 3+1, 4+kk, 4+1, 5+kk, 5+1, 6+ | |
+| **Region** | Praha | |
+| **Construction** | Excludes panel ("panelak") | |
+| **Location** | Excludes housing estates ("sidliste") | |
+
+## Utility scripts
+
+### `merge_and_map.py`
+
+Merges all `byty_*.json` files into `byty_merged.json` and generates `mapa_bytu.html`.
+
+**Deduplication logic:** Two listings are considered duplicates if they share the same normalized street name + price + area. PSN and CityHome have priority during dedup (loaded first), so their listings are kept over duplicates from other portals.
+
+## Interactive map (`mapa_bytu.html`)
+
+The generated map is a standalone HTML file using Leaflet.js with CARTO basemap tiles. Features:
+
+- **Color-coded markers** by disposition (3+kk = blue, 3+1 = green, 4+kk = orange, etc.)
+- **Heart-shaped markers** for PSN and CityHome listings (developer favorites)
+- **Source badge** in each popup (Sreality, Realingo, Bezrealitky, iDNES, PSN, CityHome)
+- **Client-side filters:** minimum floor, maximum price, hide rejected
+- **Rating system** (persisted in `localStorage`):
+  - Star -- mark as favorite (enlarged marker with pulsing glow)
+  - Reject -- dim the marker, optionally hide it
+  - Notes -- free-text notes per listing
+- **2nd floor warning** -- listings on 2. NP show an orange warning in the popup
+- **Statistics panel** -- total count, price range, average price, disposition breakdown
+
+## CLI arguments
+
+All scrapers accept the same arguments. When run via `run_all.sh`, these arguments are forwarded to every scraper.
+
+```
+--max-pages N         Maximum number of listing pages to scrape per source.
+                      Limits the breadth of the initial listing fetch.
+                      (For PSN: max pages per project)
+
+--max-properties N    Maximum number of properties to fetch details for per source.
+                      Limits the depth of the detail-fetching phase.
+
+--log-level LEVEL     Logging verbosity. One of: DEBUG, INFO, WARNING, ERROR.
+                      Default: INFO.
+                      DEBUG shows HTTP request/response details, filter decisions
+                      for every single listing, and cache hit/miss info.
+
+-h, --help            Show help message (run_all.sh only).
+```
+
+### Examples
+
+```bash
+# Full scrape (all pages, all properties)
+./run_all.sh
+
+# Quick validation run (1 page per source, max 10 properties each)
+./run_all.sh --max-pages 1 --max-properties 10
+
+# Full scrape with debug logging
+./run_all.sh --log-level DEBUG
+
+# Run a single scraper
+python3 scrape_bezrealitky.py --max-pages 2 --max-properties 5 --log-level DEBUG
+```
+
+## Running with Docker
+
+The project includes a Docker setup for unattended operation with a cron-based schedule.
+
+### Container architecture
+
+```
+┌─────────────────────────────────────────┐
+│  Container (python:3.13-alpine)         │
+│                                         │
+│  PID 1: python3 -m http.server :8080    │
+│         serves /app/data/               │
+│                                         │
+│  crond:  runs run_all.sh every 4 hours  │
+│          Europe/Prague timezone          │
+│                                         │
+│  /app/        -- scripts (.py, .sh)     │
+│  /app/data/   -- volume (JSON + HTML)   │
+│         ^ symlinked from /app/byty_*    │
+└─────────────────────────────────────────┘
+```
+
+On startup, the HTTP server starts immediately. The initial scrape runs in the background. Subsequent cron runs update data in-place every 4 hours.
+
+### Quick start
+
+```bash
+make run         # Build image + start container on port 8080
+# Map available at http://localhost:8080/mapa_bytu.html
+```
+
+### Makefile targets
+
+| Target | Description |
+|--------|-------------|
+| `make help` | Show all available targets |
+| `make build` | Build the Docker image |
+| `make run` | Build and run the container (port 8080) |
+| `make stop` | Stop and remove the container |
+| `make logs` | Tail container logs |
+| `make scrape` | Trigger a manual scrape inside the running container |
+| `make restart` | Stop and re-run the container |
+| `make clean` | Stop container and remove the Docker image |
+| `make validation` | Run a limited scrape in a separate Docker container (port 8081) |
+| `make validation-stop` | Stop the validation container |
+| `make validation-local` | Run a limited scrape locally (1 page, 10 properties) |
+| `make validation-local-debug` | Same as above with `--log-level DEBUG` |
+
+### Validation mode
+
+Validation targets run scrapers with `--max-pages 1 --max-properties 10` for a fast smoke test (~30 seconds instead of several minutes). The Docker validation target runs on port 8081 in a separate container so it doesn't interfere with production data.
+
+## Project structure
+
+```
+.
+├── scrape_and_map.py       # Sreality scraper + map generator (generate_map())
+├── scrape_realingo.py      # Realingo scraper
+├── scrape_bezrealitky.py   # Bezrealitky scraper
+├── scrape_idnes.py         # iDNES Reality scraper
+├── scrape_psn.py           # PSN scraper
+├── scrape_cityhome.py      # CityHome scraper
+├── merge_and_map.py        # Merge all sources + generate final map
+├── run_all.sh              # Orchestrator script (runs all scrapers + merge)
+├── mapa_bytu.html          # Generated interactive map (output)
+├── Makefile                # Docker management + validation shortcuts
+├── build/
+│   ├── Dockerfile          # Container image definition (python:3.13-alpine)
+│   ├── entrypoint.sh       # Container entrypoint (HTTP server + cron + initial scrape)
+│   ├── crontab             # Cron schedule (every 4 hours)
+│   └── CONTAINER.md        # Container-specific documentation
+└── .gitignore              # Ignores byty_*.json, __pycache__, .vscode
+```
+
+## Dependencies
+
+**None.** All scrapers use only the Python standard library (`urllib`, `json`, `re`, `argparse`, `logging`, `html.parser`). The only external tool required is `curl` (used by `scrape_psn.py` for Cloudflare TLS compatibility).
+
+The Docker image is based on `python:3.13-alpine` (~70 MB) with `curl`, `bash`, and `tzdata` added.
+
+## Caching behavior
+
+Each scraper maintains a JSON file cache (`byty_<source>.json`). On each run:
+
+1. The previous JSON file is loaded and indexed by `hash_id`.
+2. For each listing found in the current run, if the `hash_id` exists in cache **and** the price is unchanged, the cached record is reused without fetching the detail page.
+3. New or changed listings trigger a detail page fetch.
+4. The JSON file is overwritten with the fresh results at the end.
+
+This means the first run is slow (fetches every detail page with rate-limiting delays), but subsequent runs are much faster as they only fetch details for new or changed listings.
+
+## Rate limiting
+
+Each scraper includes polite delays between requests:
+
+| Scraper | Delay between requests |
+|---------|----------------------|
+| Sreality | 0.3s (details), 0.5s (pages) |
+| Realingo | 0.3s (details), 0.5s (pages) |
+| Bezrealitky | 0.4s (details), 0.5s (pages) |
+| iDNES | 0.4s (details), 1.0s (pages) + retry backoff (3/6/9/12s) |
+| PSN | 0.5s (per project page) |
+| CityHome | 0.5s (per project GPS fetch) |
--- a/build/.dockerignore
+++ b/build/.dockerignore
@@ -0,0 +1,5 @@
+.git
+mapa_bytu.html
+byty_*.json
+*.pyc
+__pycache__
--- a/build/CONTAINER.md
+++ b/build/CONTAINER.md
@@ -0,0 +1,100 @@
+# Container Setup
+
+OCI container image for the apartment finder. Runs two processes:
+
+1. **Web server** (`python3 -m http.server`) serving `mapa_bytu.html` on port 8080
+2. **Cron job** running `run_all.sh` (all 6 scrapers + merge) every 12 hours
+
+## Architecture
+
+```
+┌─────────────────────────────────────────┐
+│  Container (python:3.13-alpine)         │
+│                                         │
+│  PID 1: python3 -m http.server :8080    │
+│         serves /app/data/               │
+│                                         │
+│  crond:  runs run_all.sh at 06:00/18:00 │
+│          Europe/Prague timezone          │
+│                                         │
+│  /app/        ← scripts (.py, .sh)      │
+│  /app/data/   ← volume (JSON + HTML)    │
+│         ↑ symlinked from /app/byty_*    │
+└─────────────────────────────────────────┘
+```
+
+On startup, the web server starts immediately. The initial scrape runs in the background and populates data as it completes. Subsequent cron runs update the data in-place.
+
+## Build and Run
+
+```bash
+# Build the image
+docker build -t maru-hleda-byt .
+
+# Run with persistent data volume
+docker run -d --name maru-hleda-byt \
+  -p 8080:8080 \
+  -v maru-hleda-byt-data:/app/data \
+  --restart unless-stopped \
+  maru-hleda-byt
+```
+
+Access the map at **http://localhost:8080/mapa_bytu.html**
+
+## Volume Persistence
+
+A named volume `maru-hleda-byt-data` stores:
+
+- `byty_*.json` — cached scraper data (6 source files + 1 merged)
+- `mapa_bytu.html` — the generated interactive map
+
+The JSON cache is important: each scraper skips re-fetching properties that haven't changed. Without the volume, every container restart triggers a full re-scrape of all 6 portals (several minutes with rate limiting).
+
+## Cron Schedule
+
+Scrapers run at **06:00** and **18:00 Europe/Prague time** (CET/CEST).
+
+Cron output is forwarded to the container's stdout/stderr, visible via `docker logs`.
+
+## Operations
+
+```bash
+# View logs (including cron and scraper output)
+docker logs -f maru-hleda-byt
+
+# Check cron schedule
+docker exec maru-hleda-byt crontab -l
+
+# Trigger a manual scrape
+docker exec maru-hleda-byt bash /app/run_all.sh
+
+# Stop / start (data persists in volume)
+docker stop maru-hleda-byt
+docker start maru-hleda-byt
+
+# Rebuild after code changes
+docker stop maru-hleda-byt && docker rm maru-hleda-byt
+docker build -t maru-hleda-byt .
+docker run -d --name maru-hleda-byt \
+  -p 8080:8080 \
+  -v maru-hleda-byt-data:/app/data \
+  --restart unless-stopped \
+  maru-hleda-byt
+```
+
+## Troubleshooting
+
+**Map shows 404**: The initial background scrape hasn't finished yet. Check `docker logs` for progress. First run takes a few minutes due to rate-limited API calls.
+
+**SSL errors from PSN scraper**: The `scrape_psn.py` uses `curl` (not Python urllib) specifically for Cloudflare SSL compatibility. Alpine's curl includes modern TLS via OpenSSL, so this should work. If not, check that `ca-certificates` is installed (`apk add ca-certificates`).
+
+**Health check failing**: The health check has a 5-minute start period to allow the initial scrape to complete. If it still fails, verify the HTTP server is running: `docker exec maru-hleda-byt wget -q -O /dev/null http://localhost:8080/`.
+
+**Timezone verification**: `docker exec maru-hleda-byt date` should show Czech time.
+
+## Image Details
+
+- **Base**: `python:3.13-alpine` (~55 MB)
+- **Added packages**: `curl`, `bash`, `tzdata` (~10 MB)
+- **No pip packages** — all scrapers use Python standard library only
+- **Approximate image size**: ~70 MB
--- a/build/Dockerfile
+++ b/build/Dockerfile
@@ -0,0 +1,28 @@
+FROM python:3.13-alpine
+
+RUN apk add --no-cache curl bash tzdata \
+    && cp /usr/share/zoneinfo/Europe/Prague /etc/localtime \
+    && echo "Europe/Prague" > /etc/timezone
+
+ENV PYTHONUNBUFFERED=1
+ENV DATA_DIR=/app/data
+
+WORKDIR /app
+
+COPY scrape_and_map.py scrape_realingo.py scrape_bezrealitky.py \
+     scrape_idnes.py scrape_psn.py scrape_cityhome.py \
+     merge_and_map.py generate_status.py scraper_stats.py \
+     run_all.sh server.py ./
+
+COPY build/crontab /etc/crontabs/root
+COPY build/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh run_all.sh
+
+RUN mkdir -p /app/data
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=60s --timeout=5s --start-period=300s \
+    CMD wget -q -O /dev/null http://localhost:8080/ || exit 1
+
+ENTRYPOINT ["/entrypoint.sh"]
--- a/build/Makefile
+++ b/build/Makefile
@@ -0,0 +1,31 @@
+IMAGE_NAME     := maru-hleda-byt
+CONTAINER_NAME := maru-hleda-byt
+VOLUME_NAME    := maru-hleda-byt-data
+PORT           := 8080
+
+.PHONY: build run stop logs scrape restart clean
+
+build:
+	docker build -f build/Dockerfile -t $(IMAGE_NAME) .
+
+run: build
+	docker run -d --name $(CONTAINER_NAME) \
+		-p $(PORT):8080 \
+		-v $(VOLUME_NAME):/app/data \
+		--restart unless-stopped \
+		$(IMAGE_NAME)
+	@echo "Map will be at http://localhost:$(PORT)/mapa_bytu.html"
+
+stop:
+	docker stop $(CONTAINER_NAME) && docker rm $(CONTAINER_NAME)
+
+logs:
+	docker logs -f $(CONTAINER_NAME)
+
+scrape:
+	docker exec $(CONTAINER_NAME) bash /app/run_all.sh
+
+restart: stop run
+
+clean: stop
+	docker rmi $(IMAGE_NAME)
--- a/build/crontab
+++ b/build/crontab
@@ -0,0 +1 @@
+0 */4 * * * cd /app && bash /app/run_all.sh >> /proc/1/fd/1 2>> /proc/1/fd/2
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -euo pipefail
+
+export DATA_DIR="/app/data"
+
+# Create symlinks so scripts (which write to /app/) persist data to the volume
+for f in byty_sreality.json byty_realingo.json byty_bezrealitky.json \
+         byty_idnes.json byty_psn.json byty_cityhome.json byty_merged.json \
+         mapa_bytu.html ratings.json; do
+    # Remove real file if it exists (e.g. baked into image)
+    [ -f "/app/$f" ] && [ ! -L "/app/$f" ] && rm -f "/app/$f"
+    ln -sf "$DATA_DIR/$f" "/app/$f"
+done
+
+echo "[entrypoint] Starting crond..."
+crond -b -l 2
+
+echo "[entrypoint] Starting initial scrape in background..."
+bash /app/run_all.sh &
+
+echo "[entrypoint] Starting server on port 8080..."
+exec python3 /app/server.py
--- a/byty_bezrealitky.json
+++ b/byty_bezrealitky.json
@@ -1,427 +0,0 @@
-[
-  {
-    "hash_id": 990183,
-    "name": "Prodej bytu 3+kk 86 m²",
-    "price": 10385000,
-    "price_formatted": "10 385 000 Kč",
-    "locality": "Ke Tvrzi, Praha - Královice",
-    "lat": 50.0390519,
-    "lon": 14.63862,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 86,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/990183-nabidka-prodej-bytu-ke-tvrzi-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 989862,
-    "name": "Prodej bytu 3+kk 73 m²",
-    "price": 12790000,
-    "price_formatted": "12 790 000 Kč",
-    "locality": "Vrázova, Praha - Smíchov",
-    "lat": 50.0711312,
-    "lon": 14.4076652,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 73,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/989862-nabidka-prodej-bytu-vrazova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981278,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 11890000,
-    "price_formatted": "11 890 000 Kč",
-    "locality": "Argentinská, Praha - Holešovice",
-    "lat": 50.1026043,
-    "lon": 14.4435365,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981278-nabidka-prodej-bytu-argentinska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 989817,
-    "name": "Prodej bytu 3+kk 88 m²",
-    "price": 13490000,
-    "price_formatted": "13 490 000 Kč",
-    "locality": "Miroslava Hajna, Praha - Letňany",
-    "lat": 50.1406487,
-    "lon": 14.5207541,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 88,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/989817-nabidka-prodej-bytu-miroslava-hajna-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 970257,
-    "name": "Prodej bytu 3+1 106 m²",
-    "price": 12950000,
-    "price_formatted": "12 950 000 Kč",
-    "locality": "Novákových, Praha - Libeň",
-    "lat": 50.1034771,
-    "lon": 14.4758735,
-    "disposition": "3+1",
-    "floor": 5,
-    "area": 106,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/970257-nabidka-prodej-bytu-novakovych-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 972406,
-    "name": "Prodej bytu 3+kk 83 m²",
-    "price": 10490000,
-    "price_formatted": "10 490 000 Kč",
-    "locality": "Na Výrovně, Praha - Stodůlky",
-    "lat": 50.0396067,
-    "lon": 14.3167022,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 83,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/972406-nabidka-prodej-bytu-na-vyrovne",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 967142,
-    "name": "Prodej bytu 3+kk 78 m²",
-    "price": 11648000,
-    "price_formatted": "11 648 000 Kč",
-    "locality": "Na Míčánkách, Praha - Vršovice",
-    "lat": 50.0713284,
-    "lon": 14.4638722,
-    "disposition": "3+kk",
-    "floor": 6,
-    "area": 78,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/967142-nabidka-prodej-bytu-na-micankach",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 955977,
-    "name": "Prodej bytu 4+kk 75 m²",
-    "price": 10363000,
-    "price_formatted": "10 363 000 Kč",
-    "locality": "Karla Guta, Praha - Uhříněves",
-    "lat": 50.03017,
-    "lon": 14.5940072,
-    "disposition": "4+kk",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/955977-nabidka-prodej-bytu-karla-guta",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 974557,
-    "name": "Prodej bytu 4+kk 94 m²",
-    "price": 13499900,
-    "price_formatted": "13 499 900 Kč",
-    "locality": "V Dolině, Praha - Michle",
-    "lat": 50.0579963,
-    "lon": 14.4682887,
-    "disposition": "4+kk",
-    "floor": 8,
-    "area": 94,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/974557-nabidka-prodej-bytu-v-doline-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 988498,
-    "name": "Prodej bytu 3+1 75 m²",
-    "price": 11400000,
-    "price_formatted": "11 400 000 Kč",
-    "locality": "5. května, Praha - Nusle",
-    "lat": 50.0604096,
-    "lon": 14.4326302,
-    "disposition": "3+1",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/988498-nabidka-prodej-bytu-5-kvetna-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985285,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 12200000,
-    "price_formatted": "12 200 000 Kč",
-    "locality": "Klausova, Praha - Stodůlky",
-    "lat": 50.0370204,
-    "lon": 14.3432643,
-    "disposition": "3+kk",
-    "floor": 5,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985285-nabidka-prodej-bytu-klausova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 965526,
-    "name": "Prodej bytu 3+kk 77 m²",
-    "price": 11890000,
-    "price_formatted": "11 890 000 Kč",
-    "locality": "Vinohradská, Praha - Strašnice",
-    "lat": 50.0776726,
-    "lon": 14.4870072,
-    "disposition": "3+kk",
-    "floor": 16,
-    "area": 77,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/965526-nabidka-prodej-bytu-vinohradska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 924811,
-    "name": "Prodej bytu 3+kk 75 m²",
-    "price": 13390000,
-    "price_formatted": "13 390 000 Kč",
-    "locality": "Waltariho, Praha - Hloubětín",
-    "lat": 50.1076717,
-    "lon": 14.5248559,
-    "disposition": "3+kk",
-    "floor": 4,
-    "area": 75,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/924811-nabidka-prodej-bytu-waltariho-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985859,
-    "name": "Prodej bytu 3+1 80 m²",
-    "price": 9000000,
-    "price_formatted": "9 000 000 Kč",
-    "locality": "Staňkova, Praha - Háje",
-    "lat": 50.0377128,
-    "lon": 14.5311557,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 80,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985859-nabidka-prodej-bytu-stankova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 985583,
-    "name": "Prodej bytu 3+kk 76 m²",
-    "price": 10850000,
-    "price_formatted": "10 850 000 Kč",
-    "locality": "Boloňská, Praha - Horní Měcholupy",
-    "lat": 50.047328,
-    "lon": 14.5565277,
-    "disposition": "3+kk",
-    "floor": 4,
-    "area": 76,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/985583-nabidka-prodej-bytu-bolonska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981178,
-    "name": "Prodej bytu 4+kk 86 m²",
-    "price": 11990000,
-    "price_formatted": "11 990 000 Kč",
-    "locality": "Sušilova, Praha - Uhříněves",
-    "lat": 50.032081,
-    "lon": 14.5885148,
-    "disposition": "4+kk",
-    "floor": 2,
-    "area": 86,
-    "building_type": "SKELET",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981178-nabidka-prodej-bytu-susilova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 973216,
-    "name": "Prodej bytu 4+1 82 m²",
-    "price": 11357000,
-    "price_formatted": "11 357 000 Kč",
-    "locality": "Nad Kapličkou, Praha - Strašnice",
-    "lat": 50.0839509,
-    "lon": 14.4904493,
-    "disposition": "4+1",
-    "floor": 2,
-    "area": 82,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/973216-nabidka-prodej-bytu-nad-kaplickou-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 868801,
-    "name": "Prodej bytu 3+kk 109 m²",
-    "price": 7299000,
-    "price_formatted": "7 299 000 Kč",
-    "locality": "Pod Karlovem, Praha - Vinohrady",
-    "lat": 50.0676313,
-    "lon": 14.432498,
-    "disposition": "3+kk",
-    "floor": 5,
-    "area": 109,
-    "building_type": "Cihlová",
-    "ownership": "Družstevní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/868801-nabidka-prodej-bytu-pod-karlovem-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 868795,
-    "name": "Prodej bytu 3+kk 106 m²",
-    "price": 6299000,
-    "price_formatted": "6 299 000 Kč",
-    "locality": "Pod Karlovem, Praha - Vinohrady",
-    "lat": 50.0676313,
-    "lon": 14.432498,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 106,
-    "building_type": "Cihlová",
-    "ownership": "Družstevní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/868795-nabidka-prodej-bytu-pod-karlovem-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 981890,
-    "name": "Prodej bytu 3+1 84 m²",
-    "price": 12980000,
-    "price_formatted": "12 980 000 Kč",
-    "locality": "Novákových, Praha - Libeň",
-    "lat": 50.103273,
-    "lon": 14.4746894,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 84,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981890-nabidka-prodej-bytu-novakovych-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 976276,
-    "name": "Prodej bytu 3+kk 75 m²",
-    "price": 13490000,
-    "price_formatted": "13 490 000 Kč",
-    "locality": "Svornosti, Praha - Smíchov",
-    "lat": 50.0673284,
-    "lon": 14.4095087,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/976276-nabidka-prodej-bytu-svornosti-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 950787,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 9999000,
-    "price_formatted": "9 999 000 Kč",
-    "locality": "Sečská, Praha - Strašnice",
-    "lat": 50.071191,
-    "lon": 14.5035501,
-    "disposition": "3+kk",
-    "floor": 3,
-    "area": 70,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/950787-nabidka-prodej-bytu-secska-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 978045,
-    "name": "Prodej bytu 3+kk 76 m²",
-    "price": 11133000,
-    "price_formatted": "11 133 000 Kč",
-    "locality": "K Vinoři, Praha - Kbely",
-    "lat": 50.1329656,
-    "lon": 14.5618499,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 76,
-    "building_type": "Smíšená",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/978045-nabidka-prodej-bytu-k-vinori",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 974552,
-    "name": "Prodej bytu 3+1 75 m²",
-    "price": 11000000,
-    "price_formatted": "11 000 000 Kč",
-    "locality": "Vejražkova, Praha - Košíře",
-    "lat": 50.0637808,
-    "lon": 14.3612275,
-    "disposition": "3+1",
-    "floor": 2,
-    "area": 75,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/974552-nabidka-prodej-bytu-vejrazkova-praha",
-    "source": "bezrealitky",
-    "image": ""
-  },
-  {
-    "hash_id": 955010,
-    "name": "Prodej bytu 3+kk 70 m²",
-    "price": 12290000,
-    "price_formatted": "12 290 000 Kč",
-    "locality": "Břeclavská, Praha - Kyje",
-    "lat": 50.0951045,
-    "lon": 14.5454237,
-    "disposition": "3+kk",
-    "floor": 2,
-    "area": 70,
-    "building_type": "Cihlová",
-    "ownership": "Osobní",
-    "url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/955010-nabidka-prodej-bytu-breclavska-hlavni-mesto-praha",
-    "source": "bezrealitky",
-    "image": ""
-  }
-]
--- a/byty_cityhome.json
+++ b/byty_cityhome.json
@@ -1 +0,0 @@
-[]
--- a/byty_idnes.json
+++ b/byty_idnes.json
--- a/byty_merged.json
+++ b/byty_merged.json
--- a/byty_psn.json
+++ b/byty_psn.json
@@ -1 +0,0 @@
-[]
--- a/byty_realingo.json
+++ b/byty_realingo.json
--- a/byty_sreality.json
+++ b/byty_sreality.json
--- a/docs/validation.md
+++ b/docs/validation.md
@@ -0,0 +1,123 @@
+# Validation Recipe
+
+End-to-end check that scraping, data persistence, history, and the status page all work correctly in Docker.
+
+## What it verifies
+
+- All scrapers run and write output to `DATA_DIR` (`/app/data`)
+- `stats_*.json` land in `/app/data/` (not in `/app/`)
+- `status.json` and `scraper_history.json` land in `/app/data/`
+- `/api/status`, `/api/status/history`, and `/scrapers-status` serve correct data
+- History accumulates across runs
+
+## Steps
+
+### 1. Build the image
+
+```bash
+make build
+```
+
+### 2. Start a clean validation container
+
+```bash
+# Stop/remove any leftover container and volume from a previous run
+docker stop maru-hleda-byt-validation 2>/dev/null; docker rm maru-hleda-byt-validation 2>/dev/null
+docker volume rm maru-hleda-byt-validation-data 2>/dev/null
+
+docker run -d --name maru-hleda-byt-validation \
+  -p 8081:8080 \
+  -v maru-hleda-byt-validation-data:/app/data \
+  maru-hleda-byt
+```
+
+Give the container ~3 seconds to start. The entrypoint launches a background full scrape automatically — suppress it so only controlled runs execute:
+
+```bash
+sleep 3
+docker exec maru-hleda-byt-validation pkill -f run_all.sh 2>/dev/null || true
+docker exec maru-hleda-byt-validation rm -f /app/data/scraper_running.json 2>/dev/null || true
+```
+
+### 3. Run a limited scrape (run 1)
+
+```bash
+docker exec maru-hleda-byt-validation bash /app/run_all.sh --max-pages 1 --max-properties 10
+```
+
+Expected output (last few lines):
+```
+Status uložen: /app/data/status.json
+Historie uložena: /app/data/scraper_history.json (1 záznamů)
+```
+
+### 4. Verify data files are in `/app/data/`
+
+```bash
+docker exec maru-hleda-byt-validation ls /app/data/
+```
+
+Expected files:
+```
+byty_cityhome.json   byty_idnes.json   byty_merged.json
+byty_realingo.json   byty_sreality.json
+mapa_bytu.html
+scraper_history.json
+stats_bezrealitky.json  stats_cityhome.json  stats_idnes.json
+stats_realingo.json     stats_sreality.json
+status.json
+```
+
+### 5. Run a second limited scrape (run 2)
+
+```bash
+docker exec maru-hleda-byt-validation bash /app/run_all.sh --max-pages 1 --max-properties 10
+```
+
+Expected last line: `Historie uložena: /app/data/scraper_history.json (2 záznamů)`
+
+### 6. Verify history via API
+
+```bash
+curl -s http://localhost:8081/api/status/history | python3 -c "
+import json, sys
+h = json.load(sys.stdin)
+print(f'{len(h)} entries:')
+for i, e in enumerate(h):
+    print(f'  [{i}] {e[\"timestamp\"]} total={e[\"total_accepted\"]}')
+"
+```
+
+Expected: 2 entries with different timestamps.
+
+```bash
+curl -s http://localhost:8081/api/status | python3 -c "
+import json, sys; s=json.load(sys.stdin)
+print(f'status={s[\"status\"]} total={s[\"total_accepted\"]} ts={s[\"timestamp\"]}')
+"
+```
+
+Expected: `status=done total=<N> ts=<latest timestamp>`
+
+### 7. Check the status page
+
+Open http://localhost:8081/scrapers-status in a browser (or `curl -s http://localhost:8081/scrapers-status | grep -c "clickable-row"` — should print `2`).
+
+### 8. Clean up
+
+```bash
+docker stop maru-hleda-byt-validation && docker rm maru-hleda-byt-validation
+docker volume rm maru-hleda-byt-validation-data
+```
+
+Or use the Makefile shortcut:
+
+```bash
+make validation-stop
+```
+
+## Notes
+
+- PSN scraper does not support `--max-pages` and will always fail with this command; `success=False` in history is expected during validation.
+- Bezrealitky may return 0 results with a 1-page limit; `byty_bezrealitky.json` will be absent from `/app/data/` in that case — this is normal.
+- `make validation` (the Makefile target) runs the same limited scrape but does not suppress the background startup scrape, so two concurrent runs may occur. Use the manual steps above for a clean controlled test.
--- a/generate_status.py
+++ b/generate_status.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""Generate status.json from scraper JSON outputs and per-scraper stats files."""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+
+HERE = Path(__file__).parent
+DATA_DIR = Path(os.environ.get("DATA_DIR", HERE))
+
+SOURCE_FILES = {
+    "Sreality":    "byty_sreality.json",
+    "Realingo":    "byty_realingo.json",
+    "Bezrealitky": "byty_bezrealitky.json",
+    "iDNES":       "byty_idnes.json",
+    "PSN":         "byty_psn.json",
+    "CityHome":    "byty_cityhome.json",
+}
+
+STATS_FILES = {
+    "Sreality":    "stats_sreality.json",
+    "Realingo":    "stats_realingo.json",
+    "Bezrealitky": "stats_bezrealitky.json",
+    "iDNES":       "stats_idnes.json",
+    "PSN":         "stats_psn.json",
+    "CityHome":    "stats_cityhome.json",
+}
+
+MERGED_FILE = "byty_merged.json"
+HISTORY_FILE = "scraper_history.json"
+
+
+def count_source(path: Path) -> dict:
+    """Read a scraper JSON and return accepted count + file mtime."""
+    if not path.exists():
+        return {"accepted": 0, "error": "soubor nenalezen"}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        mtime = datetime.fromtimestamp(path.stat().st_mtime).isoformat(timespec="seconds")
+        return {"accepted": len(data), "updated_at": mtime}
+    except Exception as e:
+        return {"accepted": 0, "error": str(e)}
+
+
+def read_scraper_stats(path: Path) -> dict:
+    """Load a per-scraper stats JSON. Returns {} on missing or corrupt file."""
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+
+
+def append_to_history(status: dict, keep: int) -> None:
+    """Append the current status entry to scraper_history.json, keeping only `keep` latest."""
+    history_path = DATA_DIR / HISTORY_FILE
+    history: list = []
+    if history_path.exists():
+        try:
+            history = json.loads(history_path.read_text(encoding="utf-8"))
+            if not isinstance(history, list):
+                history = []
+        except Exception:
+            history = []
+
+    history.append(status)
+
+    # Keep only the N most recent entries
+    if keep > 0 and len(history) > keep:
+        history = history[-keep:]
+
+    history_path.write_text(json.dumps(history, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"Historie uložena: {history_path} ({len(history)} záznamů)")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate status.json from scraper outputs.")
+    parser.add_argument("--start-time", dest="start_time", default=None,
+                        help="ISO timestamp of scrape start (default: now)")
+    parser.add_argument("--duration", dest="duration", type=int, default=None,
+                        help="Run duration in seconds")
+    parser.add_argument("--keep", dest="keep", type=int, default=20,
+                        help="Number of history entries to keep (default: 20, 0=unlimited)")
+    args = parser.parse_args()
+
+    start_time = args.start_time or datetime.now().isoformat(timespec="seconds")
+    duration_sec = args.duration
+
+    sources = []
+    for name, filename in SOURCE_FILES.items():
+        path = HERE / filename
+        info = count_source(path)
+        info["name"] = name
+
+        # Merge in stats from the per-scraper stats file (authoritative for run data)
+        stats = read_scraper_stats(DATA_DIR / STATS_FILES[name])
+        for key in ("accepted", "fetched", "pages", "cache_hits", "excluded", "excluded_total",
+                    "success", "duration_sec", "error"):
+            if key in stats:
+                info[key] = stats[key]
+
+        sources.append(info)
+
+    # Total accepted before dedup
+    total_accepted = sum(s.get("accepted", 0) for s in sources)
+
+    # Merged / deduplicated count
+    merged_path = HERE / MERGED_FILE
+    deduplicated = 0
+    if merged_path.exists():
+        try:
+            merged = json.loads(merged_path.read_text(encoding="utf-8"))
+            deduplicated = len(merged)
+        except Exception:
+            pass
+
+    duplicates_removed = total_accepted - deduplicated if deduplicated else 0
+
+    # Top-level success: True if no source has an error
+    success = not any("error" in s for s in sources)
+
+    status = {
+        "status": "done",
+        "timestamp": start_time,
+        "duration_sec": duration_sec,
+        "success": success,
+        "total_accepted": total_accepted,
+        "deduplicated": deduplicated,
+        "duplicates_removed": duplicates_removed,
+        "sources": sources,
+    }
+
+    out = DATA_DIR / "status.json"
+    out.write_text(json.dumps(status, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"Status uložen: {out}")
+    print(f"  Celkem bytů (před dedup): {total_accepted}")
+    print(f"  Po deduplikaci:           {deduplicated}")
+    if duplicates_removed:
+        print(f"  Odstraněno duplikátů:     {duplicates_removed}")
+    for s in sources:
+        acc = s.get("accepted", 0)
+        err = s.get("error", "")
+        exc = s.get("excluded", {})
+        exc_total = sum(exc.values()) if exc else s.get("excluded_total", 0)
+        parts = [f"{s['name']:12s}: {acc} bytů"]
+        if exc_total:
+            parts.append(f"({exc_total} vyloučeno)")
+        if err:
+            parts.append(f"[CHYBA: {err}]")
+        print("  " + "  ".join(parts))
+
+    append_to_history(status, args.keep)
+
+
+if __name__ == "__main__":
+    main()
--- a/mapa_bytu.html
+++ b/mapa_bytu.html
--- a/merge_and_map.py
+++ b/merge_and_map.py
@@ -9,6 +9,7 @@ from __future__ import annotations

 import json
 import re
+import unicodedata
 from pathlib import Path

 from scrape_and_map import generate_map, format_price
@@ -19,14 +20,8 @@ def normalize_street(locality: str) -> str:
    # "Studentská, Praha 6 - Dejvice" → "studentska"
    # "Rýnská, Praha" → "rynska"
    street = locality.split(",")[0].strip().lower()
-    # Remove diacritics (simple Czech)
-    replacements = {
-        "á": "a", "č": "c", "ď": "d", "é": "e", "ě": "e",
-        "í": "i", "ň": "n", "ó": "o", "ř": "r", "š": "s",
-        "ť": "t", "ú": "u", "ů": "u", "ý": "y", "ž": "z",
-    }
-    for src, dst in replacements.items():
-        street = street.replace(src, dst)
+    # Remove diacritics using Unicode decomposition (handles all Czech characters)
+    street = unicodedata.normalize("NFKD", street).encode("ascii", "ignore").decode("ascii")
    # Remove non-alphanumeric
    street = re.sub(r"[^a-z0-9]", "", street)
    return street
@@ -79,6 +74,10 @@ def main():
        if key in seen_keys:
            dupes += 1
            existing = seen_keys[key]
+            # Preserve earliest first_seen across sources
+            dup_fs = e.get("first_seen", "")
+            if dup_fs and (not existing.get("first_seen") or dup_fs < existing["first_seen"]):
+                existing["first_seen"] = dup_fs
            # Log it
            print(f"  Duplikát: {e['locality']} | {format_price(e['price'])} | {e.get('area', '?')} m² "
                  f"({e.get('source', '?')} vs {existing.get('source', '?')})")
--- a/project/todo.md
+++ b/project/todo.md
@@ -0,0 +1,14 @@
+## Features to add
+
+- testing: run very limited scrape: 1 page, 10 properties so that we can do some validations on it
+- makefile: add target to run locally (with the webserver)
+- feature: store date of the last scrape somewhere, so that we know how fresh data are
+- feature: ?? mark property with scrape when it first appeared - we might be able to look at recent diffs only
+
+
+## code organization
+- prepare reasonable code structure from the bunch of "random" files
+
+
+## documentation
+- precisely document original intent of the app (Maru has to provide this)
--- a/regen_map.py
+++ b/regen_map.py
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-"""
-Přegeneruje mapu z již stažených dat (byty_sreality.json).
-Doplní chybějící plochy ze Sreality API, opraví URL, aplikuje filtry.
-"""
-from __future__ import annotations
-
-import json
-import time
-import urllib.request
-from pathlib import Path
-
-from scrape_and_map import (
-    generate_map, format_price, MIN_AREA, HEADERS, DETAIL_API
-)
-
-
-def api_get(url: str) -> dict:
-    req = urllib.request.Request(url, headers=HEADERS)
-    with urllib.request.urlopen(req, timeout=30) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-
-def fix_sreality_url(estate: dict) -> str:
-    """Fix the Sreality URL to include disposition segment (only if missing)."""
-    disp = estate.get("disposition", "")
-    slug_map = {
-        "1+kk": "1+kk", "1+1": "1+1", "2+kk": "2+kk", "2+1": "2+1",
-        "3+kk": "3+kk", "3+1": "3+1", "4+kk": "4+kk", "4+1": "4+1",
-        "5+kk": "5+kk", "5+1": "5+1", "6+": "6-a-vice", "Atypický": "atypicky",
-    }
-    slug = slug_map.get(disp, "byt")
-    old_url = estate.get("url", "")
-    parts = old_url.split("/")
-    try:
-        byt_idx = parts.index("byt")
-        # Only insert if disposition slug is not already there
-        if byt_idx + 1 < len(parts) and parts[byt_idx + 1] == slug:
-            return old_url  # already correct
-        parts.insert(byt_idx + 1, slug)
-        return "/".join(parts)
-    except ValueError:
-        return old_url
-
-
-def fetch_area(hash_id: int) -> int | None:
-    """Fetch area from detail API."""
-    try:
-        url = DETAIL_API.format(hash_id)
-        detail = api_get(url)
-        for item in detail.get("items", []):
-            name = item.get("name", "")
-            if "žitná ploch" in name or "zitna ploch" in name.lower():
-                return int(item["value"])
-    except Exception:
-        pass
-    return None
-
-
-def main():
-    json_path = Path("byty_sreality.json")
-    if not json_path.exists():
-        print("Soubor byty_sreality.json nenalezen. Nejprve spusť scrape_and_map.py")
-        return
-
-    estates = json.loads(json_path.read_text(encoding="utf-8"))
-    print(f"Načteno {len(estates)} bytů z byty_sreality.json")
-
-    # Step 1: Fetch missing areas
-    missing_area = [e for e in estates if e.get("area") is None]
-    print(f"Doplňuji plochu u {len(missing_area)} bytů...")
-
-    for i, e in enumerate(missing_area):
-        time.sleep(0.3)
-        area = fetch_area(e["hash_id"])
-        if area is not None:
-            e["area"] = area
-        if (i + 1) % 50 == 0:
-            print(f"  {i + 1}/{len(missing_area)} ...")
-
-    # Count results
-    with_area = sum(1 for e in estates if e.get("area") is not None)
-    print(f"Plocha doplněna: {with_area}/{len(estates)}")
-
-    # Step 2: Fix URLs
-    for e in estates:
-        e["url"] = fix_sreality_url(e)
-
-    # Step 3: Filter by min area
-    filtered = []
-    excluded = 0
-    for e in estates:
-        area = e.get("area")
-        if area is not None and area < MIN_AREA:
-            excluded += 1
-            continue
-        filtered.append(e)
-
-    print(f"Vyloučeno (< {MIN_AREA} m²): {excluded}")
-    print(f"Zbývá: {len(filtered)} bytů")
-
-    # Save updated data
-    filtered_path = Path("byty_sreality.json")
-    filtered_path.write_text(
-        json.dumps(filtered, ensure_ascii=False, indent=2),
-        encoding="utf-8",
-    )
-
-    # Generate map
-    generate_map(filtered)
-
-
-if __name__ == "__main__":
-    main()
--- a/run_all.sh
+++ b/run_all.sh
@@ -2,6 +2,8 @@
 # ============================================================
 # Spustí všechny scrapery, sloučí data a otevře mapu.
 # Použití:  ./run_all.sh
+# Nebo s limity:  ./run_all.sh --max-pages 1 --max-properties 10
+# Nebo s logováním:  ./run_all.sh --log-level DEBUG
 # ============================================================
 set -euo pipefail
 cd "$(dirname "$0")"
@@ -14,6 +16,59 @@ NC='\033[0m'
 TOTAL=6
 CURRENT=0
 FAILED=0
+START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
+START_EPOCH=$(date +%s)
+LOG_FILE="$(pwd)/scrape_run.log"
+
+# Mark scraper as running; cleaned up on exit (even on error/kill)
+LOCK_FILE="${DATA_DIR:-.}/scraper_running.json"
+echo '{"running":true,"started_at":"'"$START_TIME"'"}' > "$LOCK_FILE"
+trap 'rm -f "$LOCK_FILE"' EXIT
+
+show_help() {
+    echo "Usage: ./run_all.sh [OPTIONS]"
+    echo ""
+    echo "Spustí všechny scrapery, sloučí data a otevře mapu."
+    echo ""
+    echo "Options:"
+    echo "  --max-pages N         Maximální počet stránek ke stažení z každého zdroje"
+    echo "  --max-properties N    Maximální počet nemovitostí ke stažení z každého zdroje"
+    echo "  --log-level LEVEL     Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
+    echo "  --keep N              Počet běhů v historii (výchozí: 5, 0=neomezeno)"
+    echo "  -h, --help            Zobrazí tuto nápovědu"
+    echo ""
+    echo "Examples:"
+    echo "  ./run_all.sh                                  # plný běh"
+    echo "  ./run_all.sh --max-pages 1 --max-properties 10  # rychlý test"
+    echo "  ./run_all.sh --log-level DEBUG                # s debug logováním"
+    echo "  ./run_all.sh --keep 10                        # uchovej 10 běhů v historii"
+}
+
+# Parse arguments
+SCRAPER_ARGS=""
+KEEP_ARG=""
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        --max-pages|--max-properties|--log-level)
+            SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
+            shift 2
+            ;;
+        --keep)
+            KEEP_ARG="--keep $2"
+            shift 2
+            ;;
+        *)
+            echo "Unknown argument: $1"
+            echo ""
+            show_help
+            exit 1
+            ;;
+    esac
+done

 step() {
    CURRENT=$((CURRENT + 1))
@@ -23,27 +78,29 @@ step() {
 }

 # ── Scrapery (paralelně kde to jde) ─────────────────────────
+# Tee all output to log file for status generation
+exec > >(tee -a "$LOG_FILE") 2>&1

 step "Sreality"
-python3 scrape_and_map.py || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
-
-step "Realingo"
-python3 scrape_realingo.py || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
+python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }

 step "Bezrealitky"
-python3 scrape_bezrealitky.py || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
+python3 scrape_bezrealitky.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }

 step "iDNES Reality"
-python3 scrape_idnes.py || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
+python3 scrape_idnes.py $SCRAPER_ARGS || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }

 step "PSN + CityHome"
-python3 scrape_psn.py &
+python3 scrape_psn.py $SCRAPER_ARGS &
 PID_PSN=$!
-python3 scrape_cityhome.py &
+python3 scrape_cityhome.py $SCRAPER_ARGS &
 PID_CH=$!
 wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
 wait $PID_CH  || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }

+step "Realingo"
+python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
+
 # ── Sloučení + mapa ──────────────────────────────────────────

 step "Sloučení dat a generování mapy"
@@ -51,6 +108,12 @@ python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((F

 # ── Otevření mapy ────────────────────────────────────────────

+# ── Generování statusu ─────────────────────────────────────
+
+END_EPOCH=$(date +%s)
+DURATION=$((END_EPOCH - START_EPOCH))
+python3 generate_status.py --start-time "$START_TIME" --duration "$DURATION" $KEEP_ARG
+
 echo ""
 echo "============================================================"
 if [ $FAILED -eq 0 ]; then
@@ -60,4 +123,4 @@ else
 fi
 echo "============================================================"

-open mapa_bytu.html
+command -v open &>/dev/null && open mapa_bytu.html || true
--- a/scrape_and_map.py
+++ b/scrape_and_map.py
@@ -6,13 +6,20 @@ Hledá byty na prodej v Praze podle zadaných kritérií a generuje HTML mapu.

 from __future__ import annotations

+import argparse
 import json
+import logging
 import math
 import time
 import urllib.request
 import urllib.parse
-from datetime import datetime
+from datetime import datetime, timedelta
 from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_sreality.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace filtrů ──────────────────────────────────────────────────────

@@ -38,10 +45,26 @@ HEADERS = {


 def api_get(url: str) -> dict:
-    """Fetch JSON from Sreality API."""
+    """Fetch JSON from Sreality API with retry."""
+    for attempt in range(3):
+        logger.debug(f"HTTP GET request (attempt {attempt + 1}/3): {url}")
        req = urllib.request.Request(url, headers=HEADERS)
+        try:
            with urllib.request.urlopen(req, timeout=30) as resp:
-        return json.loads(resp.read().decode("utf-8"))
+                response_data = resp.read().decode("utf-8")
+                logger.debug(f"HTTP response: status={resp.status}, size={len(response_data)} bytes")
+                logger.debug(f"Response preview: {response_data[:200]}")
+                return json.loads(response_data)
+        except urllib.error.HTTPError:
+            raise
+        except (urllib.error.URLError, ConnectionError, OSError) as e:
+            if attempt < 2:
+                wait = (attempt + 1) * 2
+                logger.warning(f"Connection error (retry {attempt + 1}/3 after {wait}s): {e}")
+                time.sleep(wait)
+            else:
+                logger.error(f"HTTP request failed after 3 attempts: {e}", exc_info=True)
+                raise


 def build_list_url(disposition: int, page: int = 1) -> str:
@@ -59,20 +82,26 @@ def build_list_url(disposition: int, page: int = 1) -> str:
    return f"{API_BASE}?{urllib.parse.urlencode(params)}"


-def fetch_estates_for_disposition(disposition: int) -> list[dict]:
+def fetch_estates_for_disposition(disposition: int, max_pages: int | None = None) -> list[dict]:
    """Fetch all estates for a given disposition, handling pagination."""
    url = build_list_url(disposition, page=1)
-    print(f"  Fetching disposition {disposition}, page 1 ...")
+    logger.info(f"Fetching disposition {disposition}, page 1 ...")
    data = api_get(url)
    total = data.get("result_size", 0)
    estates = data.get("_embedded", {}).get("estates", [])
    total_pages = math.ceil(total / PER_PAGE) if total > 0 else 0

-    print(f"  → {total} results, {total_pages} pages")
+    logger.info(f"→ {total} results, {total_pages} pages")
+
+    # Limit pages if max_pages is specified
+    if max_pages is not None:
+        original_pages = total_pages
+        total_pages = min(total_pages, max_pages)
+        logger.debug(f"Max pages limit reached: limiting {original_pages} pages to {total_pages}")

    for page in range(2, total_pages + 1):
        time.sleep(0.5)
-        print(f"  Fetching page {page}/{total_pages} ...")
+        logger.info(f"Fetching page {page}/{total_pages} ...")
        url = build_list_url(disposition, page=page)
        data = api_get(url)
        estates.extend(data.get("_embedded", {}).get("estates", []))
@@ -84,9 +113,12 @@ def get_estate_detail(hash_id: int) -> dict | None:
    """Fetch detail for a single estate to get floor info and building type."""
    try:
        url = DETAIL_API.format(hash_id)
-        return api_get(url)
+        logger.debug(f"Fetching detail for hash_id={hash_id}")
+        detail = api_get(url)
+        logger.debug(f"Detail fetched for hash_id={hash_id}, keys: {list(detail.keys())[:5]}")
+        return detail
    except Exception as e:
-        print(f"    Warning: Could not fetch detail for {hash_id}: {e}")
+        logger.warning(f"Could not fetch detail for hash_id={hash_id}: {e}", exc_info=True)
        return None


@@ -185,24 +217,30 @@ def load_cache(json_path: str = "byty_sreality.json") -> dict[int, dict]:
        return {}


-def scrape():
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
    """Main scraping function. Returns list of filtered estates."""
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    all_estates_raw = []
    cache = load_cache()

-    print("=" * 60)
-    print("Stahuji inzeráty ze Sreality.cz")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print(f"Region: Praha")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty ze Sreality.cz")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    logger.info(f"Region: Praha")
    if cache:
-        print(f"Cache: {len(cache)} bytů z minulého běhu")
-    print("=" * 60)
+        logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
+    if max_pages:
+        logger.info(f"Limit stran: {max_pages}")
+    if max_properties:
+        logger.info(f"Limit majetků: {max_properties}")
+    logger.info("=" * 60)

    for disp in DISPOSITIONS:
-        print(f"\n▸ Dispozice: {disposition_label(disp)}")
-        estates = fetch_estates_for_disposition(disp)
+        logger.info(f"\n▸ Dispozice: {disposition_label(disp)}")
+        estates = fetch_estates_for_disposition(disp, max_pages=max_pages)
        for e in estates:
            e["_disposition_cb"] = disp
        all_estates_raw.extend(estates)
@@ -217,10 +255,10 @@ def scrape():
            seen.add(hid)
            unique_estates.append(e)

-    print(f"\n{'=' * 60}")
-    print(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
-    print(f"Stahuji detaily pro filtrování panelu/sídlišť...")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
+    logger.info(f"Stahuji detaily pro filtrování panelu/sídlišť...")
+    logger.info(f"{'=' * 60}")

    # Fetch details and filter
    results = []
@@ -229,19 +267,26 @@ def scrape():
    excluded_no_detail = 0
    excluded_small = 0
    cache_hits = 0
+    details_fetched = 0

    for i, estate in enumerate(unique_estates):
+        # Stop if max_properties reached
+        if max_properties is not None and details_fetched >= max_properties:
+            logger.debug(f"Max properties limit reached: {max_properties}")
+            break
        hash_id = estate.get("hash_id")
        gps = estate.get("gps", {})

        if not gps or not gps.get("lat") or not gps.get("lon"):
            excluded_no_gps += 1
+            logger.debug(f"Filter: hash_id={hash_id} - excluded (no GPS)")
            continue

        # Check cache — if hash_id exists and price unchanged, reuse
        cached = cache.get(hash_id)
        if cached and cached.get("price") == estate.get("price", 0):
            cache_hits += 1
+            logger.debug(f"Cache hit for hash_id={hash_id}")
            results.append(cached)
            continue

@@ -250,13 +295,15 @@ def scrape():
        detail = get_estate_detail(hash_id)
        if not detail:
            excluded_no_detail += 1
+            logger.debug(f"Filter: hash_id={hash_id} - excluded (no detail)")
            continue

        # Check panel / sídliště
        is_excluded, reason = is_panel_or_sidliste(detail)
        if is_excluded:
            excluded_panel += 1
-            print(f"  ✗ Vyloučen #{hash_id}: {reason}")
+            logger.debug(f"Filter: hash_id={hash_id} - excluded (panel/sídliště): {reason}")
+            logger.info(f"✗ Vyloučen #{hash_id}: {reason}")
            continue

        # Parse floor
@@ -276,7 +323,8 @@ def scrape():
        # Filter by minimum area
        if area is not None and area < MIN_AREA:
            excluded_small += 1
-            print(f"  ✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
+            logger.debug(f"Filter: hash_id={hash_id} - excluded (area {area} m² < {MIN_AREA} m²)")
+            logger.info(f"✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
            continue

        # Get building type
@@ -311,23 +359,44 @@ def scrape():
            "ownership": ownership,
            "url": sreality_url(hash_id, seo),
            "image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": cached.get("first_seen", datetime.now().strftime("%Y-%m-%d")) if cached else datetime.now().strftime("%Y-%m-%d"),
+            "last_changed": datetime.now().strftime("%Y-%m-%d"),
        }
+        if not validate_listing(result, "sreality"):
+            continue
        results.append(result)
+        details_fetched += 1

        if (i + 1) % 20 == 0:
-            print(f"  Zpracováno {i + 1}/{len(unique_estates)} ...")
+            logger.info(f"Zpracováno {i + 1}/{len(unique_estates)} ...")

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky:")
-    print(f"  Celkem staženo:       {len(unique_estates)}")
-    print(f"  Z cache (přeskočeno): {cache_hits}")
-    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
-    print(f"  Vyloučeno (<{MIN_AREA} m²):  {excluded_small}")
-    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
-    print(f"  Vyloučeno (bez detailu): {excluded_no_detail}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky:")
+    logger.info(f"  Celkem staženo:       {len(unique_estates)}")
+    logger.info(f"  Z cache (přeskočeno): {cache_hits}")
+    logger.info(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    logger.info(f"  Vyloučeno (<{MIN_AREA} m²):  {excluded_small}")
+    logger.info(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    logger.info(f"  Vyloučeno (bez detailu): {excluded_no_detail}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Sreality",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(unique_estates),
+        "cache_hits": cache_hits,
+        "excluded": {
+            "panel/síd": excluded_panel,
+            "<69 m²": excluded_small,
+            "bez GPS": excluded_no_gps,
+            "bez detailu": excluded_no_detail,
+        },
+    })
    return results


@@ -336,20 +405,70 @@ def scrape():
 def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
    """Generate an interactive Leaflet.js HTML map."""

-    # Color by disposition
-    color_map = {
-        "3+kk": "#2196F3",   # blue
-        "3+1": "#4CAF50",    # green
-        "4+kk": "#FF9800",   # orange
-        "4+1": "#F44336",    # red
-        "5+kk": "#9C27B0",   # purple
-        "5+1": "#795548",    # brown
-        "6+": "#607D8B",     # grey-blue
-    }
+    # Color by price per m² — cool blue→warm red scale, no yellow
+    # Thresholds based on Prague market distribution (p25=120k, p50=144k, p75=162k)
+    price_color_scale = [
+        (110_000, "#1565C0"),   # < 110k/m²  → deep blue (levné)
+        (130_000, "#42A5F5"),   # 110–130k   → light blue
+        (150_000, "#66BB6A"),   # 130–150k   → green (střed)
+        (165_000, "#EF6C00"),   # 150–165k   → dark orange
+        (float("inf"), "#C62828"),  # > 165k → dark red (drahé)
+    ]
+
+    def price_color(estate: dict) -> str:
+        price = estate.get("price") or 0
+        area = estate.get("area") or 0
+        if not area:
+            return "#9E9E9E"
+        ppm2 = price / area
+        for threshold, color in price_color_scale:
+            if ppm2 < threshold:
+                return color
+        return "#E53935"
+
+    # Legend bands for info panel (built once)
+    price_legend_items = (
+        '<div style="margin-bottom:4px;font-size:12px;color:#555;font-weight:600;">Cena / m²:</div>'
+    )
+    bands = [
+        ("#1565C0", "< 110 000 Kč/m²"),
+        ("#42A5F5", "110 – 130 000 Kč/m²"),
+        ("#66BB6A", "130 – 150 000 Kč/m²"),
+        ("#EF6C00", "150 – 165 000 Kč/m²"),
+        ("#C62828", "> 165 000 Kč/m²"),
+        ("#9E9E9E", "cena/plocha neuvedena"),
+    ]
+    for bcolor, blabel in bands:
+        price_legend_items += (
+            f'<div class="price-band" data-color="{bcolor}" onclick="toggleColorFilter(\'{bcolor}\')" '
+            f'style="display:flex;align-items:center;gap:6px;margin:2px 0;padding:2px 4px;'
+            f'border-radius:4px;border:2px solid transparent;">'
+            f'<span style="width:14px;height:14px;border-radius:50%;background:{bcolor};'
+            f'display:inline-block;border:2px solid white;box-shadow:0 1px 3px rgba(0,0,0,0.3);flex-shrink:0;"></span>'
+            f'<span>{blabel}</span></div>'
+        )
+    price_legend_items += (
+        '<div id="price-filter-reset" style="display:none;margin:3px 0 0 4px;">'
+        '<a href="#" onclick="resetColorFilter();return false;" '
+        'style="font-size:11px;color:#1976D2;text-decoration:none;">✕ Zobrazit všechny ceny</a>'
+        '</div>'
+    )
+    # New marker indicator — bigger dot, no extra border
+    price_legend_items += (
+        '<div style="display:flex;align-items:center;gap:6px;margin:6px 0 0 0;'
+        'padding-top:6px;border-top:1px solid #eee;">'
+        '<span style="display:inline-flex;align-items:center;gap:3px;flex-shrink:0;">'
+        '<span style="width:14px;height:14px;border-radius:50%;background:#66BB6A;'
+        'display:inline-block;box-shadow:0 1px 3px rgba(0,0,0,0.3);"></span>'
+        '<span style="font-size:8px;font-weight:700;background:#FFD600;color:#333;'
+        'padding:1px 3px;border-radius:2px;">NEW</span>'
+        '</span>'
+        '<span>Nové (≤ 1 den)</span></div>'
+    )

    markers_js = ""
    for e in estates:
-        color = color_map.get(e["disposition"], "#999999")
+        color = price_color(e)
        floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
        area_text = f'{e["area"]} m²' if e["area"] else "neuvedeno"
        building_text = e["building_type"] or "neuvedeno"
@@ -366,18 +485,42 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
        source_label = source_labels.get(source, source)
        source_color = source_colors.get(source, "#999")

-        hash_id = e.get("hash_id", "")
+        hash_id = f"{source}_{e.get('hash_id', '')}"
+
+        first_seen = e.get("first_seen", "")
+        last_changed = e.get("last_changed", "")
+        today = datetime.now().strftime("%Y-%m-%d")
+        yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+        is_new = first_seen in (today, yesterday)
+
+        new_badge = (
+            '<span style="margin-left:6px;font-size:11px;background:#FFD600;color:#333;'
+            'padding:1px 6px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
+            if is_new else ""
+        )
+
+        date_parts = []
+        if first_seen:
+            date_parts.append(f'Přidáno: {first_seen}')
+        if last_changed and last_changed != first_seen:
+            date_parts.append(f'Změněno: {last_changed}')
+        date_row = (
+            f'<span style="font-size:11px;color:#888;">{"&nbsp;·&nbsp;".join(date_parts)}</span><br>'
+            if date_parts else ""
+        )

        popup = (
-            f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
+            f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}" data-first-seen="{first_seen}" data-last-changed="{last_changed}">'
            f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
            f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
-            f'padding:1px 6px;border-radius:3px;">{source_label}</span><br>'
+            f'padding:1px 6px;border-radius:3px;">{source_label}</span>{new_badge}<br>'
            f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
            f'{floor_note}<br><br>'
            f'<b>{e["locality"]}</b><br>'
            f'Stavba: {building_text}<br>'
-            f'Vlastnictví: {ownership_text}<br><br>'
+            f'Vlastnictví: {ownership_text}<br>'
+            f'{date_row}'
+            f'<br>'
            f'<a href="{e["url"]}" target="_blank" '
            f'style="color:{source_color};text-decoration:none;font-weight:bold;">'
            f'→ Otevřít na {source_label}</a>'
@@ -401,26 +544,32 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
        popup = popup.replace("'", "\\'").replace("\n", "")

        is_fav = source in ("psn", "cityhome")
-        marker_fn = "addHeartMarker" if is_fav else "addMarker"
+
+        if is_fav:
+            marker_fn = "addHeartMarker"
+        elif is_new:
+            marker_fn = "addNewMarker"
+        else:
+            marker_fn = "addMarker"
        markers_js += (
-            f"  {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
+            f"  {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}', '{first_seen}', '{last_changed}');\n"
        )

-    # Build legend
-    legend_items = ""
+    # Build legend — price per m² bands + disposition counts
+    legend_items = price_legend_items
+
+    # Disposition counts below the color legend
    disp_counts = {}
    for e in estates:
        d = e["disposition"]
        disp_counts[d] = disp_counts.get(d, 0) + 1
-    for disp, color in color_map.items():
-        count = disp_counts.get(disp, 0)
-        if count > 0:
+    disp_order = ["3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+"]
+    disp_summary = ", ".join(
+        f"{d} ({disp_counts[d]})" for d in disp_order if d in disp_counts
+    )
    legend_items += (
-                f'<div style="display:flex;align-items:center;gap:6px;margin:3px 0;">'
-                f'<span style="width:14px;height:14px;border-radius:50%;'
-                f'background:{color};display:inline-block;border:2px solid white;'
-                f'box-shadow:0 1px 3px rgba(0,0,0,0.3);"></span>'
-                f'<span>{disp} ({count})</span></div>'
+        f'<div style="margin-top:8px;padding-top:6px;border-top:1px solid #eee;'
+        f'font-size:12px;color:#666;">{disp_summary}</div>'
    )

    # Heart marker legend for PSN/CityHome
@@ -456,6 +605,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
  body {{ font-family: system-ui, -apple-system, sans-serif; }}
  #map {{ width: 100%; height: 100vh; }}
  .heart-icon {{ background: none !important; border: none !important; }}
+  .star-icon {{ background: none !important; border: none !important; }}
  .rate-btn:hover {{ background: #f0f0f0 !important; }}
  .rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
  .rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
@@ -466,32 +616,73 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
  }}
  .marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
  .heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
-  .heart-icon-rej {{ opacity: 0.2 !important; }}
+  .heart-icon-rej {{ opacity: 0.4 !important; filter: grayscale(1); }}
+  .reject-overlay {{ background: none !important; border: none !important; pointer-events: none !important; }}
+  .new-badge-icon {{ background: none !important; border: none !important; pointer-events: none !important; }}
+  .new-badge {{
+    font-size: 9px; font-weight: 700; color: #333; background: #FFD600;
+    padding: 1px 4px; border-radius: 3px; white-space: nowrap;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.3); letter-spacing: 0.5px;
+  }}
  .info-panel {{
    position: absolute; top: 10px; right: 10px; z-index: 1000;
    background: white; padding: 16px; border-radius: 10px;
    box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
    font-size: 13px; line-height: 1.5;
+    transition: transform 0.3s ease, opacity 0.3s ease;
  }}
+  .info-panel.collapsed {{
+    transform: translateX(calc(100% + 20px));
+    opacity: 0; pointer-events: none;
+  }}
+  .panel-open-btn {{
+    position: absolute; top: 10px; right: 10px; z-index: 1001;
+    width: 40px; height: 40px; border-radius: 8px;
+    background: white; border: none; cursor: pointer;
+    box-shadow: 0 2px 12px rgba(0,0,0,0.15);
+    font-size: 20px; display: flex; align-items: center; justify-content: center;
+    transition: opacity 0.3s ease;
+  }}
+  .panel-open-btn.hidden {{ opacity: 0; pointer-events: none; }}
+  .panel-close-btn {{
+    position: absolute; top: 8px; right: 8px;
+    width: 28px; height: 28px; border-radius: 6px;
+    background: none; border: 1px solid #ddd; cursor: pointer;
+    font-size: 16px; display: flex; align-items: center; justify-content: center;
+    color: #888;
+  }}
+  .panel-close-btn:hover {{ background: #f0f0f0; color: #333; }}
  .info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
  .info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
  .filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
  .filter-section label {{ display: flex; align-items: center; gap: 6px; margin: 3px 0; cursor: pointer; }}
+  .price-band {{ cursor: pointer; transition: background 0.12s; }}
+  .price-band:hover {{ background: #f0f0f0; }}
+  .price-band.active {{ border-color: #333 !important; background: #e8f0fe; }}
+  .price-band.dimmed {{ opacity: 0.35; }}
  .filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
  #floor-filter {{ margin-top: 8px; }}
  #floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
+  .status-link {{ display: block; margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; text-align: center; }}
+  .status-link a {{ color: #1976D2; text-decoration: none; font-size: 12px; }}
+  @media (max-width: 600px) {{
+    .info-panel {{ max-width: calc(100vw - 60px); right: 10px; }}
+    .info-panel.collapsed {{ transform: translateX(calc(100% + 20px)); }}
+    .panel-close-btn {{ top: 6px; right: 6px; }}
+  }}
 </style>
 </head>
 <body>
 <div id="map"></div>
-<div class="info-panel">
+<button class="panel-open-btn hidden" id="panel-open-btn" onclick="togglePanel()">☰</button>
+<div class="info-panel" id="info-panel">
+  <button class="panel-close-btn" id="panel-close-btn" onclick="togglePanel()">✕</button>
  <h2>Byty v Praze</h2>
  <div class="stats">
    <div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
    <div>Cena: {min_price} — {max_price}</div>
    <div>Průměr: {avg_price}</div>
  </div>
-  <div><b>Dispozice:</b></div>
  {legend_items}
  <div class="filter-section">
    <b>Filtry:</b>
@@ -507,11 +698,23 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
    </div>
    <div style="margin-top:6px;">
      <label>Max cena:
-        <select id="max-price" onchange="applyFilters()">
-          <option value="13500000">13 500 000 Kč</option>
-          <option value="12000000">12 000 000 Kč</option>
-          <option value="10000000">10 000 000 Kč</option>
-          <option value="8000000">8 000 000 Kč</option>
+        <input type="number" id="max-price" value="13500000" max="14000000" step="500000"
+          style="width:130px;padding:2px 4px;border:1px solid #ccc;border-radius:3px;"
+          onchange="applyFilters()" onkeyup="applyFilters()"> Kč
+      </label>
+    </div>
+    <div style="margin-top:6px;">
+      <label>Přidáno / změněno:
+        <select id="days-filter" onchange="applyFilters()" style="width:100%;padding:4px;border-radius:4px;border:1px solid #ccc;">
+          <option value="0">Vše</option>
+          <option value="1">za 1 den</option>
+          <option value="2">za 2 dny</option>
+          <option value="3">za 3 dny</option>
+          <option value="4">za 4 dny</option>
+          <option value="5">za 5 dní</option>
+          <option value="7">za 7 dní</option>
+          <option value="14">za 14 dní</option>
+          <option value="30">za 30 dní</option>
        </select>
      </label>
    </div>
@@ -525,6 +728,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
      Skrýt zamítnuté
    </label>
  </div>
+  <div class="status-link"><a href="/scrapers-status">Scraper status</a></div>
 </div>

 <script>
@@ -544,9 +748,39 @@ L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_only_labels/{{z}}/{{x}}/{
  pane: 'shadowPane',
 }}).addTo(map);

+var selectedColors = [];
+
+function toggleColorFilter(color) {{
+  var idx = selectedColors.indexOf(color);
+  if (idx >= 0) selectedColors.splice(idx, 1);
+  else selectedColors.push(color);
+  document.querySelectorAll('.price-band').forEach(function(el) {{
+    var c = el.getAttribute('data-color');
+    if (selectedColors.length === 0) {{
+      el.classList.remove('active', 'dimmed');
+    }} else if (selectedColors.indexOf(c) >= 0) {{
+      el.classList.add('active'); el.classList.remove('dimmed');
+    }} else {{
+      el.classList.add('dimmed'); el.classList.remove('active');
+    }}
+  }});
+  document.getElementById('price-filter-reset').style.display =
+    selectedColors.length > 0 ? 'block' : 'none';
+  applyFilters();
+}}
+
+function resetColorFilter() {{
+  selectedColors = [];
+  document.querySelectorAll('.price-band').forEach(function(el) {{
+    el.classList.remove('active', 'dimmed');
+  }});
+  document.getElementById('price-filter-reset').style.display = 'none';
+  applyFilters();
+}}
+
 var allMarkers = [];

-function addMarker(lat, lon, color, popup, hashId) {{
+function addMarker(lat, lon, color, popup, hashId, firstSeen, lastChanged) {{
  var marker = L.circleMarker([lat, lon], {{
    radius: 8,
    fillColor: color,
@@ -555,11 +789,37 @@ function addMarker(lat, lon, color, popup, hashId) {{
    opacity: 1,
    fillOpacity: 0.85,
  }}).bindPopup(popup);
-  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId }};
+  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, firstSeen: firstSeen || '', lastChanged: lastChanged || '' }};
  allMarkers.push(marker);
  marker.addTo(map);
 }}

+function addNewMarker(lat, lon, color, popup, hashId, firstSeen, lastChanged) {{
+  var marker = L.circleMarker([lat, lon], {{
+    radius: 8,
+    fillColor: color,
+    color: '#fff',
+    weight: 2,
+    opacity: 1,
+    fillOpacity: 0.85,
+  }}).bindPopup(popup);
+  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isNew: true, firstSeen: firstSeen || '', lastChanged: lastChanged || '' }};
+  allMarkers.push(marker);
+  marker.addTo(map);
+  var badge = L.marker([lat, lon], {{
+    icon: L.divIcon({{
+      className: 'new-badge-icon',
+      html: '<span class="new-badge">NEW</span>',
+      iconSize: [32, 14],
+      iconAnchor: [-6, 7],
+    }}),
+    interactive: false,
+    pane: 'markerPane',
+  }});
+  badge.addTo(map);
+  marker._newBadge = badge;
+}}
+
 function heartIcon(color) {{
  var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
    + '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
@@ -575,11 +835,26 @@ function heartIcon(color) {{
  }});
 }}

-function addHeartMarker(lat, lon, color, popup, hashId) {{
+function starIcon() {{
+  var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" viewBox="0 0 24 24">'
+    + '<path d="M12 2l3.09 6.26L22 9.27l-5 4.87L18.18 22 12 18.27 '
+    + '5.82 22 7 14.14 2 9.27l6.91-1.01L12 2z" '
+    + 'fill="#FFC107" stroke="#F57F17" stroke-width="1" '
+    + 'filter="drop-shadow(0 1px 3px rgba(0,0,0,0.3))"/></svg>';
+  return L.divIcon({{
+    html: svg,
+    className: 'star-icon',
+    iconSize: [28, 28],
+    iconAnchor: [14, 14],
+    popupAnchor: [0, -14],
+  }});
+}}
+
+function addHeartMarker(lat, lon, color, popup, hashId, firstSeen, lastChanged) {{
  var marker = L.marker([lat, lon], {{
    icon: heartIcon(color),
  }}).bindPopup(popup);
-  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true }};
+  marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true, firstSeen: firstSeen || '', lastChanged: lastChanged || '' }};
  allMarkers.push(marker);
  marker.addTo(map);
 }}
@@ -598,6 +873,41 @@ function loadRatings() {{

 function saveRatings(ratings) {{
  localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
+  fetch('/api/ratings', {{
+    method: 'POST',
+    headers: {{'Content-Type': 'application/json'}},
+    body: JSON.stringify(ratings)
+  }}).catch(function() {{}});
+}}
+
+function addRejectStrike(marker) {{
+  removeRejectStrike(marker);
+  var color = marker._data.color || '#999';
+  // SVG "no entry" icon — circle with diagonal line, colored to match marker
+  var svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">'
+    + '<circle cx="12" cy="12" r="10" fill="none" stroke="' + color + '" stroke-width="2.5" opacity="0.85"/>'
+    + '<line x1="5.5" y1="5.5" x2="18.5" y2="18.5" stroke="' + color + '" stroke-width="2.5" stroke-linecap="round" opacity="0.85"/>'
+    + '</svg>';
+  var icon = L.divIcon({{
+    className: 'reject-overlay',
+    html: svg,
+    iconSize: [20, 20],
+    iconAnchor: [10, 10],
+  }});
+  var m = L.marker([marker._data.lat, marker._data.lon], {{
+    icon: icon,
+    interactive: false,
+    pane: 'markerPane',
+  }});
+  m.addTo(map);
+  marker._rejectStrike = m;
+}}
+
+function removeRejectStrike(marker) {{
+  if (marker._rejectStrike) {{
+    map.removeLayer(marker._rejectStrike);
+    marker._rejectStrike = null;
+  }}
 }}

 function applyMarkerStyle(marker, status) {{
@@ -614,16 +924,35 @@ function applyMarkerStyle(marker, status) {{
    }}
  }} else {{
    if (status === 'fav') {{
-      marker.setStyle({{
-        radius: 12, fillOpacity: 1, weight: 3,
-        fillColor: marker._data.color, color: '#fff',
-      }});
-      if (marker._path) marker._path.classList.add('marker-favorite');
+      removeRejectStrike(marker);
+      if (marker._newBadge && map.hasLayer(marker._newBadge)) map.removeLayer(marker._newBadge);
+      if (!marker._data._origCircle) marker._data._origCircle = true;
+      var popup = marker.getPopup();
+      var popupContent = popup ? popup.getContent() : '';
+      var wasOnMap = map.hasLayer(marker);
+      if (wasOnMap) map.removeLayer(marker);
+      var starMarker = L.marker([marker._data.lat, marker._data.lon], {{
+        icon: starIcon(),
+      }}).bindPopup(popupContent);
+      starMarker._data = marker._data;
+      var idx = allMarkers.indexOf(marker);
+      if (idx !== -1) allMarkers[idx] = starMarker;
+      if (wasOnMap) starMarker.addTo(map);
    }} else if (status === 'reject') {{
+      if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
+        revertToCircle(marker, {{ radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1 }});
+      }} else {{
        marker.setStyle({{
-        radius: 6, fillOpacity: 0.15, fillColor: '#999', color: '#bbb', weight: 1,
+          radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1,
        }});
        if (marker._path) marker._path.classList.remove('marker-favorite');
+      }}
+      // Add strikethrough line over the marker
+      addRejectStrike(marker);
+      if (marker._newBadge && map.hasLayer(marker._newBadge)) map.removeLayer(marker._newBadge);
+    }} else {{
+      if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
+        revertToCircle(marker, {{ radius: 8, fillColor: marker._data.color, color: '#fff', weight: 2, fillOpacity: 0.85 }});
      }} else {{
        marker.setStyle({{
          radius: 8, fillColor: marker._data.color, color: '#fff',
@@ -631,8 +960,25 @@ function applyMarkerStyle(marker, status) {{
        }});
        if (marker._path) marker._path.classList.remove('marker-favorite');
      }}
+      if (marker._path) marker._path.classList.remove('marker-rejected');
+      removeRejectStrike(marker);
+      if (marker._newBadge && !map.hasLayer(marker._newBadge)) marker._newBadge.addTo(map);
    }}
  }}
+}}
+
+function revertToCircle(marker, style) {{
+  var popup = marker.getPopup();
+  var popupContent = popup ? popup.getContent() : '';
+  var wasOnMap = map.hasLayer(marker);
+  if (wasOnMap) map.removeLayer(marker);
+  var cm = L.circleMarker([marker._data.lat, marker._data.lon], style).bindPopup(popupContent);
+  cm._data = marker._data;
+  delete cm._data._starRef;
+  var idx = allMarkers.indexOf(marker);
+  if (idx !== -1) allMarkers[idx] = cm;
+  if (wasOnMap) cm.addTo(map);
+}}

 function rateMarker(marker, action) {{
  var hashId = marker._data.hashId;
@@ -772,11 +1118,21 @@ map.on('popupopen', function(e) {{
 // ── Filters ────────────────────────────────────────────────────
 function applyFilters() {{
  var minFloor = parseInt(document.getElementById('min-floor').value);
-  var maxPrice = parseInt(document.getElementById('max-price').value);
+  var maxPriceEl = document.getElementById('max-price');
+  var maxPrice = parseInt(maxPriceEl.value) || 14000000;
+  if (maxPrice > 14000000) {{ maxPrice = 14000000; maxPriceEl.value = 14000000; }}
  var hideRejected = document.getElementById('hide-rejected').checked;
+  var daysFilter = parseInt(document.getElementById('days-filter').value) || 0;
  var ratings = loadRatings();
  var visible = 0;

+  var cutoff = null;
+  if (daysFilter > 0) {{
+    cutoff = new Date();
+    cutoff.setDate(cutoff.getDate() - daysFilter);
+    cutoff.setHours(0, 0, 0, 0);
+  }}
+
  allMarkers.forEach(function(m) {{
    var popup = m.getPopup().getContent();
    var floorMatch = popup.match(/(\\d+)\\. NP/);
@@ -789,14 +1145,28 @@ function applyFilters() {{
    if (floor !== null && floor < minFloor) show = false;
    if (price > maxPrice) show = false;

+    if (cutoff) {{
+      var fs = m._data.firstSeen ? new Date(m._data.firstSeen) : null;
+      var lc = m._data.lastChanged ? new Date(m._data.lastChanged) : null;
+      if (!((fs && fs >= cutoff) || (lc && lc >= cutoff))) show = false;
+    }}
+
+    if (selectedColors.length > 0 && selectedColors.indexOf(m._data.color) < 0) show = false;
+
    var r = ratings[m._data.hashId];
    if (hideRejected && r && r.status === 'reject') show = false;

    if (show) {{
      if (!map.hasLayer(m)) m.addTo(map);
      visible++;
+      // Show strike line if rejected and visible
+      if (m._rejectStrike && !map.hasLayer(m._rejectStrike)) m._rejectStrike.addTo(map);
+      if (m._newBadge && !map.hasLayer(m._newBadge)) m._newBadge.addTo(map);
    }} else {{
      if (map.hasLayer(m)) map.removeLayer(m);
+      // Hide strike line when marker hidden
+      if (m._rejectStrike && map.hasLayer(m._rejectStrike)) map.removeLayer(m._rejectStrike);
+      if (m._newBadge && map.hasLayer(m._newBadge)) map.removeLayer(m._newBadge);
    }}
  }});

@@ -811,8 +1181,45 @@ function applyFilters() {{
  document.getElementById('visible-count').textContent = visible;
 }}

-// Initialize ratings on load
+// Initialize ratings: load from server, merge with localStorage, then restore
+function initRatings() {{
+  var local = loadRatings();
+  fetch('/api/ratings')
+    .then(function(r) {{ return r.ok ? r.json() : null; }})
+    .then(function(server) {{
+      if (server && typeof server === 'object') {{
+        var merged = Object.assign({{}}, local, server);
+        localStorage.setItem(RATINGS_KEY, JSON.stringify(merged));
+      }}
      restoreRatings();
+      updateRatingCounts();
+    }})
+    .catch(function() {{
+      restoreRatings();
+      updateRatingCounts();
+    }});
+}}
+initRatings();
+
+// ── Panel toggle ──────────────────────────────────────────────
+function togglePanel() {{
+  var panel = document.getElementById('info-panel');
+  var openBtn = document.getElementById('panel-open-btn');
+  var isOpen = !panel.classList.contains('collapsed');
+  if (isOpen) {{
+    panel.classList.add('collapsed');
+    openBtn.classList.remove('hidden');
+  }} else {{
+    panel.classList.remove('collapsed');
+    openBtn.classList.add('hidden');
+  }}
+}}
+
+// On mobile, start with panel collapsed
+if (window.innerWidth <= 600) {{
+  document.getElementById('info-panel').classList.add('collapsed');
+  document.getElementById('panel-open-btn').classList.remove('hidden');
+}}

 </script>
 </body>
@@ -820,15 +1227,43 @@ restoreRatings();

    path = Path(output_path)
    path.write_text(html, encoding="utf-8")
-    print(f"\n✓ Mapa uložena: {path.resolve()}")
+    logger.info(f"\n✓ Mapa uložena: {path.resolve()}")
    return str(path.resolve())


 # ── Main ─────────────────────────────────────────────────────────────────────

 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from Sreality.cz")
+    parser.add_argument("--max-pages", type=int, help="Maximum number of pages to scrape")
+    parser.add_argument("--max-properties", type=int, help="Maximum number of properties to fetch details for")
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Sreality",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        # Save raw data as JSON backup
@@ -837,12 +1272,12 @@ if __name__ == "__main__":
            json.dumps(estates, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
-        print(f"✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"✓ Data uložena: {json_path.resolve()}")

        # Generate map
        map_path = generate_map(estates)
        elapsed = time.time() - start
-        print(f"\n⏱  Celkový čas: {elapsed:.0f} s")
-        print(f"\nOtevři v prohlížeči:\n  file://{map_path}")
+        logger.info(f"\n⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\nOtevři v prohlížeči:\n  file://{map_path}")
    else:
-        print("\nŽádné byty neodpovídají kritériím :(")
+        logger.info("\nŽádné byty neodpovídají kritériím :(")
--- a/scrape_bezrealitky.py
+++ b/scrape_bezrealitky.py
@@ -6,12 +6,20 @@ Výstup: byty_bezrealitky.json
 """
 from __future__ import annotations

+import argparse
+from datetime import datetime
 import json
+import logging
 import math
 import re
 import time
 import urllib.request
 from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_bezrealitky.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace ─────────────────────────────────────────────────────────────

@@ -63,21 +71,42 @@ HEADERS = {
 BASE_URL = "https://www.bezrealitky.cz"


+def fetch_url(url: str, retries: int = 3) -> str:
+    """Fetch URL and return HTML string with retry on transient errors."""
+    for attempt in range(retries):
+        try:
+            logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
+            req = urllib.request.Request(url, headers=HEADERS)
+            resp = urllib.request.urlopen(req, timeout=30)
+            html = resp.read().decode("utf-8")
+            logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
+            return html
+        except urllib.error.HTTPError:
+            raise
+        except (ConnectionResetError, ConnectionError, urllib.error.URLError, OSError) as e:
+            if attempt < retries - 1:
+                wait = (attempt + 1) * 2
+                logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
+                time.sleep(wait)
+            else:
+                logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
+                raise
+
+
 def fetch_page(page: int) -> tuple[list[dict], int]:
    """
    Fetch a listing page from Bezrealitky.
    Returns (list of advert dicts from Apollo cache, total count).
    """
    url = f"{BASE_URL}/vypis/nabidka-prodej/byt/praha?page={page}"
-    req = urllib.request.Request(url, headers=HEADERS)
-    resp = urllib.request.urlopen(req, timeout=30)
-    html = resp.read().decode("utf-8")
+    html = fetch_url(url)

    match = re.search(
        r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
        html, re.DOTALL
    )
    if not match:
+        logger.debug("No __NEXT_DATA__ script found in HTML")
        return [], 0

    data = json.loads(match.group(1))
@@ -98,6 +127,7 @@ def fetch_page(page: int) -> tuple[list[dict], int]:
            if tc and tc > total:
                total = tc

+    logger.debug(f"Page {page}: found {len(adverts)} adverts, total={total}")
    return adverts, total


@@ -105,15 +135,14 @@ def fetch_detail(uri: str) -> dict | None:
    """Fetch detail page for a listing."""
    try:
        url = f"{BASE_URL}/nemovitosti-byty-domy/{uri}"
-        req = urllib.request.Request(url, headers=HEADERS)
-        resp = urllib.request.urlopen(req, timeout=30)
-        html = resp.read().decode("utf-8")
+        html = fetch_url(url)

        match = re.search(
            r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
            html, re.DOTALL
        )
        if not match:
+            logger.debug("No __NEXT_DATA__ script found in detail page")
            return None

        data = json.loads(match.group(1))
@@ -124,10 +153,11 @@ def fetch_detail(uri: str) -> dict | None:
            if key.startswith("Advert:") and isinstance(val, dict):
                # Detail pages have much more fields
                if "construction" in val or "etage" in val or "ownership" in val:
+                    logger.debug(f"Detail found for {uri}: construction={val.get('construction')}, etage={val.get('etage')}")
                    return val

    except Exception as e:
-        print(f"    Warning: detail failed for {uri}: {e}")
+        logger.warning(f"Detail failed for {uri}: {e}", exc_info=True)
    return None


@@ -152,35 +182,45 @@ def load_cache(json_path: str = "byty_bezrealitky.json") -> dict[int, dict]:
        return {}


-def scrape():
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

-    print("=" * 60)
-    print("Stahuji inzeráty z Bezrealitky.cz")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Min. plocha: {MIN_AREA} m²")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print(f"Region: Praha")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty z Bezrealitky.cz")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Min. plocha: {MIN_AREA} m²")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    logger.info(f"Region: Praha")
    if cache:
-        print(f"Cache: {len(cache)} bytů z minulého běhu")
-    print("=" * 60)
+        logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
+    if max_pages:
+        logger.info(f"Max. stran: {max_pages}")
+    if max_properties:
+        logger.info(f"Max. bytů: {max_properties}")
+    logger.info("=" * 60)

    # Step 1: Fetch all listing pages
-    print("\nFáze 1: Stahování seznamu inzerátů...")
+    logger.info("\nFáze 1: Stahování seznamu inzerátů...")
    all_adverts = {}  # id -> advert dict (dedup)
    page = 1
    total = None

    while True:
-        print(f"  Strana {page} ...")
+        if max_pages and page > max_pages:
+            logger.debug(f"Max pages limit reached: {max_pages}")
+            break
+        logger.info(f"Strana {page} ...")
        adverts, total_count = fetch_page(page)

        if total is None and total_count > 0:
            total = total_count
            total_pages = math.ceil(total / PER_PAGE)
-            print(f"  → Celkem {total} inzerátů, ~{total_pages} stran")
+            logger.info(f"→ Celkem {total} inzerátů, ~{total_pages} stran")

        if not adverts:
+            logger.debug(f"No adverts found on page {page}, stopping")
            break

        for adv in adverts:
@@ -193,7 +233,7 @@ def scrape():
            break
        time.sleep(0.5)

-    print(f"\n  Staženo: {len(all_adverts)} unikátních inzerátů")
+    logger.info(f"\nStaženo: {len(all_adverts)} unikátních inzerátů")

    # Step 2: Pre-filter by disposition, price, area from list data
    pre_filtered = []
@@ -203,47 +243,57 @@ def scrape():
    excluded_no_gps = 0

    for adv in all_adverts.values():
+        adv_id = adv.get("id")
        disp = adv.get("disposition", "")
        if disp not in WANTED_DISPOSITIONS:
            excluded_disp += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (disposition {disp})")
            continue

        price = adv.get("price", 0) or 0
        if price > MAX_PRICE or price == 0:
            excluded_price += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (price {price})")
            continue

        surface = adv.get("surface")
        if surface is not None and surface < MIN_AREA:
            excluded_area += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (area {surface} m²)")
            continue

        gps = adv.get("gps", {})
        if not gps or not gps.get("lat") or not gps.get("lng"):
            excluded_no_gps += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (no GPS)")
            continue

        pre_filtered.append(adv)

-    print(f"\nPo předfiltraci:")
-    print(f"  Vyloučeno (dispozice): {excluded_disp}")
-    print(f"  Vyloučeno (cena):      {excluded_price}")
-    print(f"  Vyloučeno (plocha):    {excluded_area}")
-    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
-    print(f"  Zbývá:                 {len(pre_filtered)}")
+    logger.info(f"\nPo předfiltraci:")
+    logger.info(f"  Vyloučeno (dispozice): {excluded_disp}")
+    logger.info(f"  Vyloučeno (cena):      {excluded_price}")
+    logger.info(f"  Vyloučeno (plocha):    {excluded_area}")
+    logger.info(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    logger.info(f"  Zbývá:                 {len(pre_filtered)}")

    # Step 3: Fetch details
-    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
    results = []
    excluded_panel = 0
    excluded_floor = 0
    excluded_detail = 0
    cache_hits = 0
+    properties_fetched = 0

    for i, adv in enumerate(pre_filtered):
+        if max_properties and properties_fetched >= max_properties:
+            logger.debug(f"Max properties limit reached: {max_properties}")
+            break
        uri = adv.get("uri", "")
        if not uri:
            excluded_detail += 1
+            logger.debug(f"Filter: id={adv.get('id')} - excluded (no URI)")
            continue

        # Check cache — if hash_id exists and price unchanged, reuse
@@ -252,6 +302,7 @@ def scrape():
        cached = cache.get(adv_id)
        if cached and cached.get("price") == adv_price:
            cache_hits += 1
+            logger.debug(f"Cache hit for id={adv_id}")
            results.append(cached)
            continue

@@ -260,26 +311,30 @@ def scrape():

        if not detail:
            excluded_detail += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (detail fetch failed)")
            continue

        # Check construction — exclude panel
        construction = detail.get("construction", "")
        if construction == "PANEL":
            excluded_panel += 1
-            print(f"  ✗ Vyloučen #{adv['id']}: panel")
+            logger.debug(f"Filter: id={adv['id']} - excluded (panel construction)")
+            logger.info(f"✗ Vyloučen #{adv['id']}: panel")
            continue

        # Check situation — exclude sídliště
        situation = detail.get("situation", "")
        if situation and "HOUSING_ESTATE" in str(situation).upper():
            excluded_panel += 1
-            print(f"  ✗ Vyloučen #{adv['id']}: sídliště")
+            logger.debug(f"Filter: id={adv['id']} - excluded (housing estate)")
+            logger.info(f"✗ Vyloučen #{adv['id']}: sídliště")
            continue

        # Check floor (etage)
        etage = detail.get("etage")
        if etage is not None and etage < MIN_FLOOR:
            excluded_floor += 1
+            logger.debug(f"Filter: id={adv_id} - excluded (floor {etage})")
            continue

        gps = adv.get("gps", {})
@@ -315,28 +370,83 @@ def scrape():
            "url": f"{BASE_URL}/nemovitosti-byty-domy/{uri}",
            "source": "bezrealitky",
            "image": "",
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": cached.get("first_seen", datetime.now().strftime("%Y-%m-%d")) if cached else datetime.now().strftime("%Y-%m-%d"),
+            "last_changed": datetime.now().strftime("%Y-%m-%d"),
        }
+        if not validate_listing(result, "bezrealitky"):
+            continue
        results.append(result)
+        properties_fetched += 1

        if (i + 1) % 20 == 0:
-            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+            logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky Bezrealitky:")
-    print(f"  Předfiltrováno:        {len(pre_filtered)}")
-    print(f"  Z cache (přeskočeno): {cache_hits}")
-    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
-    print(f"  Vyloučeno (patro):     {excluded_floor}")
-    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky Bezrealitky:")
+    logger.info(f"  Předfiltrováno:        {len(pre_filtered)}")
+    logger.info(f"  Z cache (přeskočeno): {cache_hits}")
+    logger.info(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    logger.info(f"  Vyloučeno (patro):     {excluded_floor}")
+    logger.info(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Bezrealitky",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_adverts),
+        "pages": page - 1,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "dispozice": excluded_disp,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "bez GPS": excluded_no_gps,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from Bezrealitky.cz")
+    parser.add_argument("--max-pages", type=int, default=None,
+                        help="Maximum number of listing pages to scrape")
+    parser.add_argument("--max-properties", type=int, default=None,
+                        help="Maximum number of properties to fetch details for")
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Bezrealitky",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_bezrealitky.json")
@@ -345,7 +455,7 @@ if __name__ == "__main__":
            encoding="utf-8",
        )
        elapsed = time.time() - start
-        print(f"\n✓ Data uložena: {json_path.resolve()}")
-        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"⏱  Celkový čas: {elapsed:.0f} s")
    else:
-        print("\nŽádné byty z Bezrealitek neodpovídají kritériím :(")
+        logger.info("\nŽádné byty z Bezrealitek neodpovídají kritériím :(")
--- a/scrape_cityhome.py
+++ b/scrape_cityhome.py
@@ -6,11 +6,19 @@ Výstup: byty_cityhome.json
 """
 from __future__ import annotations

+import argparse
 import json
+import logging
 import re
 import time
 import urllib.request
+from datetime import datetime
 from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_cityhome.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace ─────────────────────────────────────────────────────────────

@@ -29,18 +37,26 @@ HEADERS = {
 BASE_URL = "https://www.city-home.cz"


-def fetch_url(url: str) -> str:
-    """Fetch URL and return HTML string."""
-    for attempt in range(3):
+def fetch_url(url: str, retries: int = 3) -> str:
+    """Fetch URL and return HTML string. Raises HTTPError on 4xx/5xx."""
+    for attempt in range(retries):
        try:
+            logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
            req = urllib.request.Request(url, headers=HEADERS)
            resp = urllib.request.urlopen(req, timeout=30)
-            return resp.read().decode("utf-8")
+            html = resp.read().decode("utf-8")
+            logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
+            return html
+        except urllib.error.HTTPError:
+            # Don't retry on HTTP errors (404, 403, etc.) — re-raise immediately
+            raise
        except (ConnectionResetError, ConnectionError, urllib.error.URLError) as e:
-            if attempt < 2:
-                time.sleep((attempt + 1) * 2)
-                print(f"    Retry {attempt + 1}: {e}")
+            if attempt < retries - 1:
+                wait = (attempt + 1) * 2
+                logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
+                time.sleep(wait)
            else:
+                logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
                raise


@@ -114,31 +130,21 @@ def parse_filter_page(html: str) -> list[dict]:
        if detail_url and not detail_url.startswith("http"):
            detail_url = BASE_URL + detail_url

-        # Extract floor from cells — look for pattern like "3.NP" or "2.PP"
+        # Parse table cells: [unit_name, unit_type_label, address, floor, disposition, area, transaction, price]
        cells = re.findall(r'<td[^>]*>(.*?)</td>', row_content, re.DOTALL)
-        floor = None
-        floor_text = ""
-        project_name = ""
+        cell_texts = [re.sub(r'<[^>]+>', '', c).strip() for c in cells]

-        for cell in cells:
-            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
-            # Floor pattern
-            np_match = re.search(r'(\d+)\.\s*NP', cell_text)
-            pp_match = re.search(r'(\d+)\.\s*PP', cell_text)
+        # Cell[2] = address (e.g. "Žateckých 14"), cell[3] = floor (e.g. "3.NP")
+        project_address = cell_texts[2] if len(cell_texts) > 2 else ""
+
+        floor = None
+        if len(cell_texts) > 3:
+            np_match = re.search(r'(\d+)\.\s*NP', cell_texts[3])
+            pp_match = re.search(r'(\d+)\.\s*PP', cell_texts[3])
            if np_match:
                floor = int(np_match.group(1))
-                floor_text = cell_text
            elif pp_match:
-                floor = -int(pp_match.group(1))  # Underground
-                floor_text = cell_text
-
-        # Extract project name — usually in a cell that's not a number/price/floor
-        for cell in cells:
-            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
-            if cell_text and not re.match(r'^[\d\s.,]+$', cell_text) and "NP" not in cell_text and "PP" not in cell_text and "m²" not in cell_text and "Kč" not in cell_text and "EUR" not in cell_text and "CZK" not in cell_text:
-                if len(cell_text) > 3 and cell_text != unit_name:
-                    project_name = cell_text
-                    break
+                floor = -int(pp_match.group(1))

        listing = {
            "price": int(cena.group(1)),
@@ -148,45 +154,77 @@ def parse_filter_page(html: str) -> list[dict]:
            "project_id": project.group(1) if project else "",
            "transaction": transaction.group(1) if transaction else "",
            "disposition": dispozition.group(1) if dispozition else "",
-            "location": location.group(1) if location else "",
            "url": detail_url,
            "unit_name": unit_name,
            "floor": floor,
-            "project_name": project_name,
+            "project_address": project_address,
        }
        listings.append(listing)

    return listings


-def extract_project_gps(html: str) -> dict[str, tuple[float, float]]:
-    """Extract GPS coordinates for projects from locality pages."""
-    # Pattern in JS: ['<h4>Project Name</h4>...', 'LAT', 'LON', '1', 'Name']
-    gps_data = {}
-    for match in re.finditer(r"\['[^']*<h4>([^<]+)</h4>[^']*',\s*'([\d.]+)',\s*'([\d.]+)'", html):
-        name = match.group(1).strip()
-        lat = float(match.group(2))
-        lon = float(match.group(3))
-        gps_data[name] = (lat, lon)
-    return gps_data
+def get_lokalita_urls(slug: str) -> list[str]:
+    """Return candidate lokalita URLs to try in order."""
+    return [
+        f"{BASE_URL}/projekty/{slug}/lokalita",
+        f"{BASE_URL}/bytove-domy/{slug}/lokalita",
+        f"{BASE_URL}/bytove-domy/{slug}/lokalita1",
+    ]


-def scrape():
-    print("=" * 60)
-    print("Stahuji inzeráty z CityHome (city-home.cz)")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Min. plocha: {MIN_AREA} m²")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print("=" * 60)
+def extract_project_gps(html: str) -> tuple[float, float] | None:
+    """Extract project GPS from lokalita page JS variable.
+
+    The page contains: var locations = [['<h4>Name</h4>...', 'LAT', 'LNG', 'CATEGORY', 'Label'], ...]
+    Category '1' = the project's own marker. Some projects have two cat-1 entries (data error);
+    in that case we pick the one whose name contains a digit and is not a transit landmark.
+    """
+    block = re.search(r'var locations\s*=\s*\[(.*?)\];', html, re.DOTALL)
+    if not block:
+        return None
+
+    entries = re.findall(
+        r"'<h4>(.*?)</h4>.*?',\s*'([\d.]+)',\s*'([\d.]+)',\s*'1'",
+        block.group(0),
+        re.DOTALL,
+    )
+    if not entries:
+        return None
+
+    if len(entries) == 1:
+        return float(entries[0][1]), float(entries[0][2])
+
+    # Multiple cat-1 entries: pick the real project marker
+    transit_re = re.compile(r'nádraží|park|metro|tramvaj|autobus|zastávka', re.IGNORECASE)
+    for name, lat, lng in entries:
+        if re.search(r'\d', name) and not transit_re.search(name):
+            return float(lat), float(lng)
+
+    # Fallback: first entry
+    return float(entries[0][1]), float(entries[0][2])
+
+
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty z CityHome (city-home.cz)")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Min. plocha: {MIN_AREA} m²")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    if max_properties:
+        logger.info(f"Max. bytů: {max_properties}")
+    logger.info("=" * 60)

    # Step 1: Fetch the main filter page
-    print("\nFáze 1: Stahování seznamu bytů...")
+    logger.info("\nFáze 1: Stahování seznamu bytů...")
    html = fetch_url(f"{BASE_URL}/filtr-nemovitosti1")
    all_listings = parse_filter_page(html)
-    print(f"  Nalezeno: {len(all_listings)} jednotek")
+    logger.info(f"Nalezeno: {len(all_listings)} jednotek")

    # Step 2: Collect unique project slugs from detail URLs to fetch GPS
-    print("\nFáze 2: Stahování GPS souřadnic projektů...")
+    logger.info("\nFáze 2: Stahování GPS souřadnic projektů...")
    project_slugs = set()
    for listing in all_listings:
        url = listing.get("url", "")
@@ -198,23 +236,37 @@ def scrape():
    # Fetch GPS for each project from locality pages
    project_gps = {}
    for slug in sorted(project_slugs):
-        time.sleep(0.5)
+        time.sleep(0.3)
+        gps = None
+        for url in get_lokalita_urls(slug):
            try:
-            locality_url = f"{BASE_URL}/projekty/{slug}/lokalita"
-            loc_html = fetch_url(locality_url)
+                logger.debug(f"Fetching project GPS: {url}")
+                loc_html = fetch_url(url)
                gps = extract_project_gps(loc_html)
                if gps:
-                # Take first entry (the project itself)
-                first_name, (lat, lon) = next(iter(gps.items()))
-                project_gps[slug] = (lat, lon)
-                print(f"  ✓ {slug}: {lat}, {lon}")
-            else:
-                print(f"  ✗ {slug}: GPS nenalezeno")
+                    break
            except Exception as e:
-            print(f"  ✗ {slug}: chyba ({e})")
+                logger.debug(f"GPS fetch failed for {url}: {e}")
+                continue
+
+        if gps:
+            project_gps[slug] = gps
+            logger.info(f"✓ {slug}: {gps[0]}, {gps[1]}")
+        else:
+            logger.info(f"✗ {slug}: GPS nenalezeno")
+
+    # Load previous output for first_seen/last_changed tracking
+    _prev_cache: dict[str, dict] = {}
+    _prev_path = Path("byty_cityhome.json")
+    if _prev_path.exists():
+        try:
+            for _item in json.loads(_prev_path.read_text(encoding="utf-8")):
+                _prev_cache[str(_item["hash_id"])] = _item
+        except Exception:
+            pass

    # Step 3: Filter listings
-    print(f"\nFáze 3: Filtrování...")
+    logger.info(f"\nFáze 3: Filtrování...")
    results = []
    excluded_sold = 0
    excluded_type = 0
@@ -223,45 +275,57 @@ def scrape():
    excluded_area = 0
    excluded_floor = 0
    excluded_no_gps = 0
+    properties_fetched = 0

    for listing in all_listings:
+        if max_properties and properties_fetched >= max_properties:
+            logger.debug(f"Max properties limit reached: {max_properties}")
+            break
+        unit_name = listing.get("unit_name", "unknown")
        # Only available units
        if listing["free"] != "yes":
            excluded_sold += 1
+            logger.debug(f"Filter: {unit_name} - excluded (not free)")
            continue

        # Only apartments (unittype=2)
        if listing["unittype"] != 2:
            excluded_type += 1
+            logger.debug(f"Filter: {unit_name} - excluded (not apartment, unittype={listing['unittype']})")
            continue

        # Only sales
        if listing["transaction"] != "prodej":
            excluded_type += 1
+            logger.debug(f"Filter: {unit_name} - excluded (not sale, transaction={listing['transaction']})")
            continue

        # Disposition
        disp = listing["disposition"]
        if disp not in WANTED_DISPOSITIONS:
            excluded_disp += 1
+            logger.debug(f"Filter: {unit_name} - excluded (disposition {disp})")
            continue

        # Price
        price = listing["price"]
        if price <= 0 or price > MAX_PRICE:
            excluded_price += 1
+            logger.debug(f"Filter: {unit_name} - excluded (price {price})")
            continue

        # Area
        area = listing["area"]
        if area < MIN_AREA:
            excluded_area += 1
+            logger.debug(f"Filter: {unit_name} - excluded (area {area} m²)")
            continue

        # Floor
        floor = listing["floor"]
        if floor is not None and floor < MIN_FLOOR:
            excluded_floor += 1
+            logger.debug(f"Filter: {unit_name} - excluded (floor {floor})")
            continue

        # GPS from project
@@ -272,48 +336,116 @@ def scrape():

        if not gps:
            excluded_no_gps += 1
+            logger.debug(f"Filter: {unit_name} - excluded (no GPS for project {slug})")
            continue

        lat, lon = gps

+        # locality: use project address from cell (e.g. "Žateckých 14") + city from GPS lookup
+        project_address = listing.get("project_address", "")
+        # derive city from slug (GPS lookup key)
+        city_map = {
+            "karlinske-namesti-5": "Praha 8",
+            "melnicka-12": "Praha 7",
+            "na-vaclavce-34": "Praha 5",
+            "nad-kajetankou-12": "Praha 6",
+            "vosmikovych-3": "Praha 9",
+            "zateckych-14": "Praha 2",
+        }
+        city_str = city_map.get(slug, "Praha")
+        locality_str = f"{project_address}, {city_str}" if project_address else city_str
+
        result = {
            "hash_id": f"cityhome_{slug}_{listing['unit_name']}",
-            "name": f"Prodej bytu {disp} {area} m² — {listing['project_name']}",
+            "name": f"Prodej bytu {disp}, {int(area)} m² — {project_address}",
            "price": price,
            "price_formatted": format_price(price),
-            "locality": f"{listing['project_name']}, Praha",
+            "locality": locality_str,
            "lat": lat,
            "lon": lon,
            "disposition": disp,
            "floor": floor,
-            "area": area,
+            "area": float(area),
            "building_type": "Cihlová",  # CityHome renovuje cihlové domy
            "ownership": "neuvedeno",
            "url": url,
            "source": "cityhome",
            "image": "",
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": _prev_cache.get(f"cityhome_{slug}_{listing['unit_name']}", {}).get("first_seen", datetime.now().strftime("%Y-%m-%d")),
+            "last_changed": datetime.now().strftime("%Y-%m-%d") if _prev_cache.get(f"cityhome_{slug}_{listing['unit_name']}", {}).get("price") != price else _prev_cache[f"cityhome_{slug}_{listing['unit_name']}"].get("last_changed", datetime.now().strftime("%Y-%m-%d")),
        }
+        if not validate_listing(result, "cityhome"):
+            continue
        results.append(result)
+        properties_fetched += 1

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky CityHome:")
-    print(f"  Celkem jednotek:       {len(all_listings)}")
-    print(f"  Vyloučeno (prodáno):   {excluded_sold}")
-    print(f"  Vyloučeno (typ):       {excluded_type}")
-    print(f"  Vyloučeno (dispozice): {excluded_disp}")
-    print(f"  Vyloučeno (cena):      {excluded_price}")
-    print(f"  Vyloučeno (plocha):    {excluded_area}")
-    print(f"  Vyloučeno (patro):     {excluded_floor}")
-    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky CityHome:")
+    logger.info(f"  Celkem jednotek:       {len(all_listings)}")
+    logger.info(f"  Vyloučeno (prodáno):   {excluded_sold}")
+    logger.info(f"  Vyloučeno (typ):       {excluded_type}")
+    logger.info(f"  Vyloučeno (dispozice): {excluded_disp}")
+    logger.info(f"  Vyloučeno (cena):      {excluded_price}")
+    logger.info(f"  Vyloučeno (plocha):    {excluded_area}")
+    logger.info(f"  Vyloučeno (patro):     {excluded_floor}")
+    logger.info(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "CityHome",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "excluded": {
+            "prodáno": excluded_sold,
+            "typ": excluded_type,
+            "dispozice": excluded_disp,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "patro": excluded_floor,
+            "bez GPS": excluded_no_gps,
+        },
+    })
    return results


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from CityHome")
+    parser.add_argument("--max-pages", type=int, default=None,
+                        help="Maximum number of listing pages to scrape (not applicable for CityHome)")
+    parser.add_argument("--max-properties", type=int, default=None,
+                        help="Maximum number of properties to include in results")
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "CityHome",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_cityhome.json")
@@ -322,7 +454,7 @@ if __name__ == "__main__":
            encoding="utf-8",
        )
        elapsed = time.time() - start
-        print(f"\n✓ Data uložena: {json_path.resolve()}")
-        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"⏱  Celkový čas: {elapsed:.0f} s")
    else:
-        print("\nŽádné byty z CityHome neodpovídají kritériím :(")
+        logger.info("\nŽádné byty z CityHome neodpovídají kritériím :(")
--- a/scrape_idnes.py
+++ b/scrape_idnes.py
@@ -6,14 +6,21 @@ Výstup: byty_idnes.json
 """
 from __future__ import annotations

+import argparse
+from datetime import datetime
 import json
+import logging
 import math
 import re
 import time
 import urllib.request
 import urllib.parse
-from html.parser import HTMLParser
 from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_idnes.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace ─────────────────────────────────────────────────────────────

@@ -51,17 +58,21 @@ def fetch_url(url: str) -> str:
    """Fetch URL and return HTML string with retry logic."""
    for attempt in range(MAX_RETRIES):
        try:
+            logger.debug(f"HTTP GET request (attempt {attempt + 1}/{MAX_RETRIES}): {url}")
+            logger.debug(f"Headers: {HEADERS}")
            req = urllib.request.Request(url, headers=HEADERS)
            resp = urllib.request.urlopen(req, timeout=30)
            data = resp.read()
+            logger.debug(f"HTTP response: status={resp.status}, size={len(data)} bytes")
            return data.decode("utf-8")
        except (ConnectionResetError, ConnectionError, urllib.error.URLError,
                OSError) as e:
            if attempt < MAX_RETRIES - 1:
                wait = (attempt + 1) * 3  # 3, 6, 9, 12s
-                print(f"    Retry {attempt + 1}/{MAX_RETRIES} (wait {wait}s): {e}")
+                logger.warning(f"Connection error (retry {attempt + 1}/{MAX_RETRIES} after {wait}s): {e}")
                time.sleep(wait)
            else:
+                logger.error(f"HTTP request failed after {MAX_RETRIES} attempts: {e}", exc_info=True)
                raise


@@ -269,38 +280,49 @@ def load_cache(json_path: str = "byty_idnes.json") -> dict[str, dict]:
        return {}


-def scrape():
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

-    print("=" * 60)
-    print("Stahuji inzeráty z Reality iDNES")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Min. plocha: {MIN_AREA} m²")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print(f"Region: Praha")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty z Reality iDNES")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Min. plocha: {MIN_AREA} m²")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    logger.info(f"Region: Praha")
    if cache:
-        print(f"Cache: {len(cache)} bytů z minulého běhu")
-    print("=" * 60)
+        logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
+    if max_pages:
+        logger.info(f"Max. stran: {max_pages}")
+    if max_properties:
+        logger.info(f"Max. bytů: {max_properties}")
+    logger.info("=" * 60)

    # Step 1: Fetch listing pages
-    print("\nFáze 1: Stahování seznamu inzerátů...")
+    logger.info("\nFáze 1: Stahování seznamu inzerátů...")
    all_listings = {}  # id -> listing dict
    page = 0
    total = None

    while True:
+        if max_pages and page >= max_pages:
+            logger.debug(f"Max pages limit reached: {max_pages}")
+            break
        url = build_list_url(page)
-        print(f"  Strana {page + 1} ...")
+        logger.info(f"Strana {page + 1} ...")
        html = fetch_url(url)

        if total is None:
            total = parse_total_count(html)
            total_pages = math.ceil(total / PER_PAGE) if total > 0 else 1
-            print(f"  → Celkem {total} inzerátů, ~{total_pages} stran")
+            logger.info(f"→ Celkem {total} inzerátů, ~{total_pages} stran")

        listings = parse_listings(html)
+        logger.debug(f"Page {page}: found {len(listings)} listings")

        if not listings:
+            logger.debug(f"No listings found on page {page}, stopping")
            break

        for item in listings:
@@ -313,7 +335,7 @@ def scrape():
            break
        time.sleep(1.0)

-    print(f"\n  Staženo: {len(all_listings)} unikátních inzerátů")
+    logger.info(f"\nStaženo: {len(all_listings)} unikátních inzerátů")

    # Step 2: Pre-filter by price and area from list data
    pre_filtered = []
@@ -322,40 +344,49 @@ def scrape():
    excluded_disp = 0

    for item in all_listings.values():
+        item_id = item["id"]
        if item["price"] <= 0 or item["price"] > MAX_PRICE:
            excluded_price += 1
+            logger.debug(f"Filter: id={item_id} - excluded (price {item['price']})")
            continue

        if item["area"] is not None and item["area"] < MIN_AREA:
            excluded_area += 1
+            logger.debug(f"Filter: id={item_id} - excluded (area {item['area']} m²)")
            continue

        if item["disposition"] == "?":
            excluded_disp += 1
+            logger.debug(f"Filter: id={item_id} - excluded (unknown disposition)")
            continue

        pre_filtered.append(item)

-    print(f"\nPo předfiltraci:")
-    print(f"  Vyloučeno (cena):      {excluded_price}")
-    print(f"  Vyloučeno (plocha):    {excluded_area}")
-    print(f"  Vyloučeno (dispozice): {excluded_disp}")
-    print(f"  Zbývá:                 {len(pre_filtered)}")
+    logger.info(f"\nPo předfiltraci:")
+    logger.info(f"  Vyloučeno (cena):      {excluded_price}")
+    logger.info(f"  Vyloučeno (plocha):    {excluded_area}")
+    logger.info(f"  Vyloučeno (dispozice): {excluded_disp}")
+    logger.info(f"  Zbývá:                 {len(pre_filtered)}")

    # Step 3: Fetch details for GPS, floor, construction
-    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
    results = []
    excluded_panel = 0
    excluded_floor = 0
    excluded_no_gps = 0
    excluded_detail = 0
    cache_hits = 0
+    properties_fetched = 0

    for i, item in enumerate(pre_filtered):
+        if max_properties and properties_fetched >= max_properties:
+            logger.debug(f"Max properties limit reached: {max_properties}")
+            break
        # Check cache — if hash_id exists and price unchanged, reuse
        cached = cache.get(str(item["id"]))
        if cached and cached.get("price") == item["price"]:
            cache_hits += 1
+            logger.debug(f"Cache hit for id={item['id']}")
            results.append(cached)
            continue

@@ -365,34 +396,39 @@ def scrape():
        try:
            html = fetch_url(url)
        except Exception as e:
-            print(f"    Warning: detail failed for {item['id']}: {e}")
            excluded_detail += 1
+            logger.warning(f"Detail failed for id={item['id']}: {e}")
            continue

        detail = parse_detail(html)
+        logger.debug(f"Detail parsed for id={item['id']}: lat={detail.get('lat')}, lon={detail.get('lon')}, floor={detail.get('floor')}")

        # Must have GPS
        if not detail.get("lat") or not detail.get("lon"):
            excluded_no_gps += 1
+            logger.debug(f"Filter: id={item['id']} - excluded (no GPS)")
            continue

        # Check construction — exclude panel
        construction = detail.get("construction", "")
        if "panel" in construction:
            excluded_panel += 1
-            print(f"  ✗ Vyloučen {item['id'][:12]}...: panel ({construction})")
+            logger.debug(f"Filter: id={item['id']} - excluded (panel construction)")
+            logger.info(f"✗ Vyloučen {item['id'][:12]}...: panel ({construction})")
            continue

        # Check for sídliště in construction/description
        if "sídliště" in construction or "sidliste" in construction:
            excluded_panel += 1
-            print(f"  ✗ Vyloučen {item['id'][:12]}...: sídliště")
+            logger.debug(f"Filter: id={item['id']} - excluded (housing estate)")
+            logger.info(f"✗ Vyloučen {item['id'][:12]}...: sídliště")
            continue

        # Check floor
        floor = detail.get("floor")
        if floor is not None and floor < MIN_FLOOR:
            excluded_floor += 1
+            logger.debug(f"Filter: id={item['id']} - excluded (floor {floor})")
            continue

        # Map construction to Czech label
@@ -427,29 +463,84 @@ def scrape():
            "url": item["url"],
            "source": "idnes",
            "image": "",
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": cached.get("first_seen", datetime.now().strftime("%Y-%m-%d")) if cached else datetime.now().strftime("%Y-%m-%d"),
+            "last_changed": datetime.now().strftime("%Y-%m-%d"),
        }
+        if not validate_listing(result, "idnes"):
+            continue
        results.append(result)
+        properties_fetched += 1

        if (i + 1) % 20 == 0:
-            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+            logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky Reality iDNES:")
-    print(f"  Předfiltrováno:        {len(pre_filtered)}")
-    print(f"  Z cache (přeskočeno): {cache_hits}")
-    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
-    print(f"  Vyloučeno (patro):     {excluded_floor}")
-    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
-    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky Reality iDNES:")
+    logger.info(f"  Předfiltrováno:        {len(pre_filtered)}")
+    logger.info(f"  Z cache (přeskočeno): {cache_hits}")
+    logger.info(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    logger.info(f"  Vyloučeno (patro):     {excluded_floor}")
+    logger.info(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    logger.info(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "iDNES",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "pages": page,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "dispozice": excluded_disp,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez GPS": excluded_no_gps,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from Reality iDNES")
+    parser.add_argument("--max-pages", type=int, default=None,
+                        help="Maximum number of listing pages to scrape")
+    parser.add_argument("--max-properties", type=int, default=None,
+                        help="Maximum number of properties to fetch details for")
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "iDNES",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_idnes.json")
@@ -458,7 +549,7 @@ if __name__ == "__main__":
            encoding="utf-8",
        )
        elapsed = time.time() - start
-        print(f"\n✓ Data uložena: {json_path.resolve()}")
-        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"⏱  Celkový čas: {elapsed:.0f} s")
    else:
-        print("\nŽádné byty z Reality iDNES neodpovídají kritériím :(")
+        logger.info("\nŽádné byty z Reality iDNES neodpovídají kritériím :(")
--- a/scrape_psn.py
+++ b/scrape_psn.py
@@ -1,16 +1,25 @@
 #!/usr/bin/env python3
 """
 PSN.cz scraper.
-Stáhne byty na prodej v Praze z projektů PSN a vyfiltruje podle kritérií.
+Stáhne byty na prodej z API /api/units-list — jeden požadavek, žádné stránkování.
 Výstup: byty_psn.json
 """
 from __future__ import annotations

+import argparse
 import json
+import logging
 import re
 import subprocess
 import time
+from datetime import datetime
 from pathlib import Path
+from urllib.parse import urlencode
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_psn.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace ─────────────────────────────────────────────────────────────

@@ -18,78 +27,43 @@ MAX_PRICE = 14_000_000
 MIN_AREA = 69
 MIN_FLOOR = 2

-WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
+WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1", "5+kk a větší"}
+
+# Pouze Praha — ostatní města (Brno, Pardubice, Špindlerův Mlýn) přeskočit
+WANTED_CITIES = {"Praha"}

 UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"

 BASE_URL = "https://psn.cz"
-
-# Known Prague project slugs with GPS (from research)
-PRAGUE_PROJECTS = [
-    {"slug": "zit-branik", "name": "Žít Braník", "lat": 50.0353, "lon": 14.4125},
-    {"slug": "rostislavova-4", "name": "Rostislavova 4", "lat": 50.0620, "lon": 14.4463},
-    {"slug": "pod-drinopolem", "name": "Pod Drinopolem", "lat": 50.0851, "lon": 14.3720},
-    {"slug": "skyline-chodov", "name": "Skyline Chodov", "lat": 50.0418, "lon": 14.4990},
-    {"slug": "jitro", "name": "Jitro", "lat": 50.0729, "lon": 14.4768},
-    {"slug": "maroldka", "name": "Maroldka", "lat": 50.0614, "lon": 14.4517},
-    {"slug": "belehradska-29", "name": "Bělehradská 29", "lat": 50.0682, "lon": 14.4348},
-    {"slug": "jeseniova-93", "name": "Jeseniova 93", "lat": 50.0887, "lon": 14.4692},
-    {"slug": "vanguard", "name": "Vanguard", "lat": 50.0164, "lon": 14.4036},
-    {"slug": "vinohradska-160", "name": "Vinohradská 160", "lat": 50.0780, "lon": 14.4653},
-    {"slug": "hermanova24", "name": "Heřmanova 24", "lat": 50.1009, "lon": 14.4313},
-    {"slug": "vinohradska-8", "name": "Vinohradská 8", "lat": 50.0787, "lon": 14.4342},
-    {"slug": "bydleni-na-vysinach", "name": "Bydlení Na Výšinách", "lat": 50.1003, "lon": 14.4187},
-    {"slug": "bydleni-u-pekaren", "name": "Bydlení U Pekáren", "lat": 50.0555, "lon": 14.5414},
-    {"slug": "pechackova-6", "name": "Pechackova 6", "lat": 50.0734, "lon": 14.4063},
-    {"slug": "ahoj-vanguard", "name": "Ahoj Vanguard", "lat": 50.0164, "lon": 14.4033},
-]
+UNITS_API = f"{BASE_URL}/api/units-list"


-def fetch_url(url: str) -> str:
-    """Fetch URL via curl (urllib SSL too old for Cloudflare)."""
+def fetch_json(url: str, retries: int = 3) -> dict:
+    """Fetch JSON via curl (urllib SSL may fail on Cloudflare) with retry."""
+    for attempt in range(retries):
+        logger.debug(f"HTTP GET (attempt {attempt + 1}/{retries}): {url}")
        result = subprocess.run(
            ["curl", "-s", "-L", "--max-time", "30",
             "-H", f"User-Agent: {UA}",
-         "-H", "Accept: text/html",
+             "-H", "Accept: application/json",
             url],
            capture_output=True, text=True, timeout=60
        )
-    if result.returncode != 0:
-        raise RuntimeError(f"curl failed ({result.returncode}): {result.stderr[:200]}")
-    return result.stdout
+        if result.returncode == 0:
+            return json.loads(result.stdout)
+        if attempt < retries - 1:
+            wait = (attempt + 1) * 2
+            logger.warning(f"curl failed (retry {attempt + 1}/{retries} after {wait}s): {result.stderr[:200]}")
+            time.sleep(wait)
+        else:
+            raise RuntimeError(f"curl failed after {retries} attempts ({result.returncode}): {result.stderr[:200]}")


-def extract_units_from_html(html: str) -> list[dict]:
-    """Extract unit JSON objects from raw HTML with escaped quotes."""
-    # The HTML contains RSC data with escaped JSON: \\"key\\":\\"value\\"
-    # Step 1: Unescape the double-backslash-quotes to regular quotes
-    cleaned = html.replace('\\"', '"')
-
-    # Step 2: Find each unit by looking for "title":"Byt and walking back to {
-    units = []
-    decoder = json.JSONDecoder()
-
-    for m in re.finditer(r'"title":"Byt', cleaned):
-        pos = m.start()
-        # Walk backwards to find the opening brace
-        depth = 0
-        found = False
-        for i in range(pos - 1, max(pos - 3000, 0), -1):
-            if cleaned[i] == '}':
-                depth += 1
-            elif cleaned[i] == '{':
-                if depth == 0:
-                    try:
-                        obj, end = decoder.raw_decode(cleaned, i)
-                        if isinstance(obj, dict) and 'price_czk' in obj:
-                            units.append(obj)
-                            found = True
-                    except (json.JSONDecodeError, ValueError):
-                        pass
-                    break
-                depth -= 1
-
-    return units
+def fix_gps(lat, lng):
+    """PSN má u některých projektů prohozené lat/lng — opravíme."""
+    if lat is not None and lng is not None and lat < 20 and lng > 20:
+        return lng, lat
+    return lat, lng


 def format_price(price: int) -> str:
@@ -101,197 +75,249 @@ def format_price(price: int) -> str:
    return " ".join(reversed(parts)) + " Kč"


-def scrape():
-    print("=" * 60)
-    print("Stahuji inzeráty z PSN.cz")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Min. plocha: {MIN_AREA} m²")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print(f"Region: Praha ({len(PRAGUE_PROJECTS)} projektů)")
-    print("=" * 60)
+def scrape(max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty z PSN.cz")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Min. plocha: {MIN_AREA} m²")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    logger.info(f"Region: Praha")
+    if max_properties:
+        logger.info(f"Max. bytů: {max_properties}")
+    logger.info("=" * 60)

-    # Fetch units from each Prague project
-    all_units = []
-
-    for proj in PRAGUE_PROJECTS:
-        page = 1
-        project_units = []
-
-        while True:
-            url = f"{BASE_URL}/projekt/{proj['slug']}?page={page}"
-            print(f"  {proj['name']} — strana {page} ...")
-            time.sleep(0.5)
+    # Jediný API požadavek — vrátí všechny jednotky (cca 236)
+    params = urlencode({
+        "locale": "cs",
+        "filters": "{}",
+        "type": "list",
+        "order": "price-asc",
+        "offset": 0,
+        "limit": 500,
+    })
+    url = f"{UNITS_API}?{params}"
+    logger.info("Stahuji jednotky z API ...")

    try:
-                html = fetch_url(url)
+        data = fetch_json(url)
    except Exception as e:
-                print(f"    Chyba: {e}")
-                break
+        logger.error(f"Chyba při stahování: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - _run_start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        return []

-            units = extract_units_from_html(html)
+    all_units = data.get("units", {}).get("data", [])
+    logger.info(f"Staženo jednotek celkem: {len(all_units)}")

-            if not units:
-                if page == 1:
-                    print(f"    → 0 jednotek")
-                break
+    # Load previous output for first_seen/last_changed tracking
+    _prev_cache: dict[str, dict] = {}
+    _prev_path = Path("byty_psn.json")
+    if _prev_path.exists():
+        try:
+            for _item in json.loads(_prev_path.read_text(encoding="utf-8")):
+                _prev_cache[str(_item["hash_id"])] = _item
+        except Exception:
+            pass

-            # Add project info to each unit
-            for unit in units:
-                if not unit.get("latitude") or not unit.get("longitude"):
-                    unit["latitude"] = proj["lat"]
-                    unit["longitude"] = proj["lon"]
-                unit["_project_name"] = proj["name"]
-                unit["_project_slug"] = proj["slug"]
-
-            project_units.extend(units)
-
-            if page == 1:
-                print(f"    → {len(units)} jednotek na stránce")
-
-            # Check if there might be more pages
-            # If we got fewer than expected or same units, stop
-            if len(units) < 10:
-                break
-
-            page += 1
-            if page > 10:  # Safety limit
-                break
-
-        all_units.extend(project_units)
-
-    # Deduplicate by slug
-    seen_slugs = set()
-    unique_units = []
-    for u in all_units:
-        slug = u.get("slug", "")
-        if slug and slug not in seen_slugs:
-            seen_slugs.add(slug)
-            unique_units.append(u)
-        elif not slug:
-            unique_units.append(u)
-
-    print(f"\n  Staženo celkem: {len(unique_units)} unikátních jednotek")
-
-    # Filter
-    print(f"\nFiltrování...")
+    # Filtrování
    results = []
-    excluded_sold = 0
-    excluded_type = 0
-    excluded_disp = 0
-    excluded_price = 0
-    excluded_area = 0
-    excluded_floor = 0
-    excluded_panel = 0
+    excluded = {
+        "prodáno": 0,
+        "typ": 0,
+        "město": 0,
+        "dispozice": 0,
+        "cena": 0,
+        "plocha": 0,
+        "patro": 0,
+    }
+    properties_fetched = 0

-    for unit in unique_units:
-        # Only free units
+    for unit in all_units:
+        if max_properties and properties_fetched >= max_properties:
+            break
+
+        unit_id = unit.get("id", "?")
+
+        # Pouze prodej bytů (type_id=0)
+        if unit.get("type_id") != 0:
+            excluded["typ"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (type_id={unit.get('type_id')}, není prodej bytu)")
+            continue
+
+        # Pouze volné (ne rezervované, prodané, v přípravě)
+        sale_status = unit.get("sale_status", "")
        is_free = unit.get("is_free", False)
        is_sold = unit.get("is_sold", False)
        if is_sold or not is_free:
-            excluded_sold += 1
+            excluded["prodáno"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (status={sale_status})")
            continue

-        # Only apartments
-        category = str(unit.get("category", "")).lower()
-        if "byt" not in category and "ateliér" not in category:
-            excluded_type += 1
+        # Pouze Praha
+        city = (unit.get("location") or unit.get("address", {}).get("city") or "").strip()
+        # location field je typicky "Praha 4", "Praha 7" atd.
+        city_base = city.split(" ")[0] if city else ""
+        if city_base not in WANTED_CITIES:
+            excluded["město"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (město={city})")
            continue

-        # Disposition
+        # Dispozice
        disp = unit.get("disposition", "")
        if disp not in WANTED_DISPOSITIONS:
-            excluded_disp += 1
+            excluded["dispozice"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (dispozice={disp})")
            continue

-        # Price
-        price = unit.get("price_czk") or unit.get("action_price_czk") or 0
-        if price <= 0 or price > MAX_PRICE:
-            excluded_price += 1
+        # Cena
+        price = unit.get("action_price_czk") or unit.get("price_czk") or 0
+        if not price or price <= 0 or price > MAX_PRICE:
+            excluded["cena"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (cena={price})")
            continue

-        # Area
+        # Plocha
        area = unit.get("total_area") or unit.get("floor_area") or 0
        if area < MIN_AREA:
-            excluded_area += 1
+            excluded["plocha"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (plocha={area} m²)")
            continue

-        # Floor
+        # Patro
        floor_str = str(unit.get("floor", ""))
        floor = None
        if floor_str:
            try:
                floor = int(floor_str)
            except ValueError:
-                floor_match = re.search(r'(-?\d+)', floor_str)
-                if floor_match:
-                    floor = int(floor_match.group(1))
+                m = re.search(r'(-?\d+)', floor_str)
+                if m:
+                    floor = int(m.group(1))

        if floor is not None and floor < MIN_FLOOR:
-            excluded_floor += 1
+            excluded["patro"] += 1
+            logger.debug(f"id={unit_id}: přeskočen (patro={floor})")
            continue

-        # Construction — check for panel
-        build_type = str(unit.get("build_type", "")).lower()
-        if "panel" in build_type:
-            excluded_panel += 1
-            print(f"  ✗ Vyloučen: panel ({build_type})")
+        # GPS — opravit prohozené souřadnice
+        lat_raw = unit.get("latitude")
+        lng_raw = unit.get("longitude")
+        lat, lng = fix_gps(lat_raw, lng_raw)
+        if not lat or not lng:
+            logger.warning(f"id={unit_id}: chybí GPS souřadnice, přeskakuji")
            continue

-        # Build construction label
-        building_type = "neuvedeno"
-        if build_type and build_type != "nevybráno":
-            if "cihlo" in build_type or "cihla" in build_type:
-                building_type = "Cihlová"
-            elif "skelet" in build_type:
-                building_type = "Skeletová"
+        # Sestavit adresu pro locality
+        addr = unit.get("address") or {}
+        street = addr.get("street", "")
+        street_no = addr.get("street_no", "")
+        if street and street_no:
+            locality_str = f"{street} {street_no}, {city}"
+        elif street:
+            locality_str = f"{street}, {city}"
        else:
-                building_type = build_type.capitalize()
+            project_name = unit.get("project", "")
+            locality_str = f"{project_name}, {city}" if project_name else city

-        lat = unit.get("latitude", 0)
-        lon = unit.get("longitude", 0)
-
-        slug = unit.get("slug", "")
-        project_slug = unit.get("_project_slug", "")
-        detail_url = f"{BASE_URL}/projekt/{project_slug}/{slug}" if slug else f"{BASE_URL}/projekt/{project_slug}"
+        # URL na detail jednotky
+        unit_slug = unit.get("slug", "")
+        project_slug = ""
+        # project_slug lze odvodit z projektu nebo z reference_no
+        # API nevrací project_slug přímo — použijeme reference_no nebo jen ID
+        reference_no = unit.get("reference_no", "")
+        if unit_slug:
+            detail_url = f"{BASE_URL}/prodej/{unit_slug}"
+        elif reference_no:
+            detail_url = f"{BASE_URL}/prodej/{reference_no}"
+        else:
+            detail_url = BASE_URL

        result = {
-            "hash_id": unit.get("id", slug),
-            "name": f"Prodej bytu {disp} {area} m² — {unit.get('_project_name', '')}",
+            "hash_id": str(unit_id),
+            "name": f"Prodej bytu {disp}, {int(area)} m² — {unit.get('project', locality_str)}",
            "price": int(price),
            "price_formatted": format_price(int(price)),
-            "locality": f"{unit.get('street', unit.get('_project_name', ''))}, Praha",
+            "locality": locality_str,
            "lat": lat,
-            "lon": lon,
+            "lon": lng,
            "disposition": disp,
            "floor": floor,
-            "area": area,
-            "building_type": building_type,
-            "ownership": unit.get("ownership", "neuvedeno") or "neuvedeno",
+            "area": float(area),
+            "building_type": "neuvedeno",
+            "ownership": "osobní",
            "url": detail_url,
            "source": "psn",
            "image": "",
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": _prev_cache.get(str(unit_id), {}).get("first_seen", datetime.now().strftime("%Y-%m-%d")),
+            "last_changed": datetime.now().strftime("%Y-%m-%d") if _prev_cache.get(str(unit_id), {}).get("price") != int(price) else _prev_cache[str(unit_id)].get("last_changed", datetime.now().strftime("%Y-%m-%d")),
        }
+        if not validate_listing(result, "psn"):
+            continue
        results.append(result)
+        properties_fetched += 1

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky PSN:")
-    print(f"  Celkem jednotek:       {len(unique_units)}")
-    print(f"  Vyloučeno (prodáno):   {excluded_sold}")
-    print(f"  Vyloučeno (typ):       {excluded_type}")
-    print(f"  Vyloučeno (dispozice): {excluded_disp}")
-    print(f"  Vyloučeno (cena):      {excluded_price}")
-    print(f"  Vyloučeno (plocha):    {excluded_area}")
-    print(f"  Vyloučeno (patro):     {excluded_floor}")
-    print(f"  Vyloučeno (panel):     {excluded_panel}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky PSN:")
+    logger.info(f"  Staženo jednotek:      {len(all_units)}")
+    for reason, count in excluded.items():
+        if count:
+            logger.info(f"  Vyloučeno ({reason}): {count}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "PSN",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_units),
+        "excluded": excluded,
+    })
    return results


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from PSN.cz")
+    parser.add_argument("--max-properties", type=int, default=None,
+                        help="Maximum number of properties to include in results")
+    parser.add_argument("--log-level", type=str, default="INFO",
+                        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "PSN",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_psn.json")
@@ -300,7 +326,7 @@ if __name__ == "__main__":
            encoding="utf-8",
        )
        elapsed = time.time() - start
-        print(f"\n✓ Data uložena: {json_path.resolve()}")
-        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"⏱  Celkový čas: {elapsed:.1f} s")
    else:
-        print("\nŽádné byty z PSN neodpovídají kritériím :(")
+        logger.info("\nŽádné byty z PSN neodpovídají kritériím :(")
--- a/scrape_realingo.py
+++ b/scrape_realingo.py
@@ -6,12 +6,20 @@ Výstup: byty_realingo.json
 """
 from __future__ import annotations

+import argparse
+from datetime import datetime
 import json
+import logging
 import math
 import re
 import time
 import urllib.request
 from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_realingo.json"
+
+logger = logging.getLogger(__name__)

 # ── Konfigurace (sdílená se Sreality scraperem) ─────────────────────────────

@@ -48,6 +56,28 @@ HEADERS = {
 BASE_URL = "https://www.realingo.cz"


+def fetch_url(url: str, retries: int = 3) -> str:
+    """Fetch URL and return HTML string with retry on transient errors."""
+    for attempt in range(retries):
+        try:
+            logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
+            req = urllib.request.Request(url, headers=HEADERS)
+            resp = urllib.request.urlopen(req, timeout=30)
+            html = resp.read().decode("utf-8")
+            logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
+            return html
+        except urllib.error.HTTPError:
+            raise
+        except (ConnectionResetError, ConnectionError, urllib.error.URLError, OSError) as e:
+            if attempt < retries - 1:
+                wait = (attempt + 1) * 2
+                logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
+                time.sleep(wait)
+            else:
+                logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
+                raise
+
+
 def fetch_listing_page(page: int = 1) -> tuple[list[dict], int]:
    """Fetch a page of Prague listings. Returns (items, total_count)."""
    if page == 1:
@@ -55,19 +85,18 @@ def fetch_listing_page(page: int = 1) -> tuple[list[dict], int]:
    else:
        url = f"{BASE_URL}/prodej_byty/praha/{page}_strana/"

-    req = urllib.request.Request(url, headers=HEADERS)
-    resp = urllib.request.urlopen(req, timeout=30)
-    html = resp.read().decode("utf-8")
-
+    html = fetch_url(url)
    match = re.search(
        r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
        html, re.DOTALL
    )
    if not match:
+        logger.debug("No __NEXT_DATA__ script found in HTML")
        return [], 0

    data = json.loads(match.group(1))
    offer_list = data["props"]["pageProps"]["store"]["offer"]["list"]
+    logger.debug(f"Page {page}: found {len(offer_list['data'])} items, total={offer_list['total']}")
    return offer_list["data"], offer_list["total"]


@@ -75,24 +104,23 @@ def fetch_detail(listing_url: str) -> dict | None:
    """Fetch detail page for a listing to get floor, building type, etc."""
    try:
        url = f"{BASE_URL}{listing_url}"
-        req = urllib.request.Request(url, headers=HEADERS)
-        resp = urllib.request.urlopen(req, timeout=30)
-        html = resp.read().decode("utf-8")
-
+        html = fetch_url(url)
        match = re.search(
            r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
            html, re.DOTALL
        )
        if not match:
+            logger.debug("No __NEXT_DATA__ script found in detail page")
            return None

        data = json.loads(match.group(1))
        details = data["props"]["pageProps"]["store"]["offer"]["details"]
        # Get first (only) detail entry
        for detail_data in details.values():
+            logger.debug(f"Detail fetched for {listing_url}")
            return detail_data
    except Exception as e:
-        print(f"    Warning: detail fetch failed for {listing_url}: {e}")
+        logger.warning(f"Detail fetch failed for {listing_url}: {e}", exc_info=True)
    return None


@@ -117,34 +145,44 @@ def load_cache(json_path: str = "byty_realingo.json") -> dict[int, dict]:
        return {}


-def scrape():
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
+    _run_start = time.time()
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    cache = load_cache()

-    print("=" * 60)
-    print("Stahuji inzeráty z Realingo.cz")
-    print(f"Cena: do {format_price(MAX_PRICE)}")
-    print(f"Min. plocha: {MIN_AREA} m²")
-    print(f"Patro: od {MIN_FLOOR}. NP")
-    print(f"Region: Praha")
+    logger.info("=" * 60)
+    logger.info("Stahuji inzeráty z Realingo.cz")
+    logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+    logger.info(f"Min. plocha: {MIN_AREA} m²")
+    logger.info(f"Patro: od {MIN_FLOOR}. NP")
+    logger.info(f"Region: Praha")
    if cache:
-        print(f"Cache: {len(cache)} bytů z minulého běhu")
-    print("=" * 60)
+        logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
+    if max_pages:
+        logger.info(f"Max. stran: {max_pages}")
+    if max_properties:
+        logger.info(f"Max. bytů: {max_properties}")
+    logger.info("=" * 60)

    # Step 1: Fetch all listing pages
-    print("\nFáze 1: Stahování seznamu inzerátů...")
+    logger.info("\nFáze 1: Stahování seznamu inzerátů...")
    all_listings = []
    page = 1
    total = None

    while True:
-        print(f"  Strana {page} ...")
+        if max_pages and page > max_pages:
+            logger.debug(f"Max pages limit reached: {max_pages}")
+            break
+        logger.info(f"Strana {page} ...")
        items, total_count = fetch_listing_page(page)
        if total is None:
            total = total_count
            total_pages = math.ceil(total / PER_PAGE)
-            print(f"  → Celkem {total} inzerátů, {total_pages} stran")
+            logger.info(f"→ Celkem {total} inzerátů, {total_pages} stran")

        if not items:
+            logger.debug(f"No items found on page {page}, stopping")
            break

        all_listings.extend(items)
@@ -153,7 +191,7 @@ def scrape():
            break
        time.sleep(0.5)

-    print(f"\n  Staženo: {len(all_listings)} inzerátů")
+    logger.info(f"\nStaženo: {len(all_listings)} inzerátů")

    # Step 2: Pre-filter by category, price, area from listing data
    pre_filtered = []
@@ -163,50 +201,60 @@ def scrape():
    excluded_no_gps = 0

    for item in all_listings:
+        item_id = item.get("id")
        cat = item.get("category", "")
        if cat not in WANTED_CATEGORIES:
            excluded_category += 1
+            logger.debug(f"Filter: id={item_id} - excluded (category {cat})")
            continue

        price = item.get("price", {}).get("total", 0) or 0
        if price > MAX_PRICE or price == 0:
            excluded_price += 1
+            logger.debug(f"Filter: id={item_id} - excluded (price {price})")
            continue

        area = item.get("area", {}).get("main")
        if area is not None and area < MIN_AREA:
            excluded_area += 1
+            logger.debug(f"Filter: id={item_id} - excluded (area {area} m²)")
            continue

        loc = item.get("location", {})
        if not loc.get("latitude") or not loc.get("longitude"):
            excluded_no_gps += 1
+            logger.debug(f"Filter: id={item_id} - excluded (no GPS)")
            continue

        pre_filtered.append(item)

-    print(f"\nPo předfiltraci:")
-    print(f"  Vyloučeno (dispozice): {excluded_category}")
-    print(f"  Vyloučeno (cena):      {excluded_price}")
-    print(f"  Vyloučeno (plocha):    {excluded_area}")
-    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
-    print(f"  Zbývá:                 {len(pre_filtered)}")
+    logger.info(f"\nPo předfiltraci:")
+    logger.info(f"  Vyloučeno (dispozice): {excluded_category}")
+    logger.info(f"  Vyloučeno (cena):      {excluded_price}")
+    logger.info(f"  Vyloučeno (plocha):    {excluded_area}")
+    logger.info(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    logger.info(f"  Zbývá:                 {len(pre_filtered)}")

    # Step 3: Fetch details for remaining listings (floor, building type)
-    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
    results = []
    excluded_panel = 0
    excluded_floor = 0
    excluded_detail = 0
    cache_hits = 0
+    properties_fetched = 0

    for i, item in enumerate(pre_filtered):
+        if max_properties and properties_fetched >= max_properties:
+            logger.debug(f"Max properties limit reached: {max_properties}")
+            break
        # Check cache — if hash_id exists and price unchanged, reuse
        item_id = int(item["id"])
        item_price = item.get("price", {}).get("total", 0) or 0
        cached = cache.get(item_id)
        if cached and cached.get("price") == item_price:
            cache_hits += 1
+            logger.debug(f"Cache hit for id={item_id}")
            results.append(cached)
            continue

@@ -215,6 +263,7 @@ def scrape():

        if not detail_data:
            excluded_detail += 1
+            logger.debug(f"Filter: id={item_id} - excluded (detail fetch failed)")
            continue

        detail = detail_data.get("offer", {}).get("detail", {})
@@ -225,20 +274,23 @@ def scrape():
        building_type = detail.get("buildingType", "")
        if building_type == "PANEL":
            excluded_panel += 1
-            print(f"  ✗ Vyloučen #{item['id']}: panel")
+            logger.debug(f"Filter: id={item['id']} - excluded (panel construction)")
+            logger.info(f"✗ Vyloučen #{item['id']}: panel")
            continue

        # Check building position — exclude sídliště
        building_position = detail.get("buildingPosition", "")
        if building_position and "ESTATE" in str(building_position).upper():
            excluded_panel += 1
-            print(f"  ✗ Vyloučen #{item['id']}: sídliště")
+            logger.debug(f"Filter: id={item['id']} - excluded (building estate)")
+            logger.info(f"✗ Vyloučen #{item['id']}: sídliště")
            continue

        # Check floor
        floor = detail.get("floor")
        if floor is not None and floor < MIN_FLOOR:
            excluded_floor += 1
+            logger.debug(f"Filter: id={item_id} - excluded (floor {floor})")
            continue

        # Map building type
@@ -275,28 +327,83 @@ def scrape():
            "url": f"{BASE_URL}{item['url']}",
            "source": "realingo",
            "image": "",
+            "scraped_at": datetime.now().strftime("%Y-%m-%d"),
+            "first_seen": cached.get("first_seen", datetime.now().strftime("%Y-%m-%d")) if cached else datetime.now().strftime("%Y-%m-%d"),
+            "last_changed": datetime.now().strftime("%Y-%m-%d"),
        }
+        if not validate_listing(result, "realingo"):
+            continue
        results.append(result)
+        properties_fetched += 1

        if (i + 1) % 20 == 0:
-            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+            logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")

-    print(f"\n{'=' * 60}")
-    print(f"Výsledky Realingo:")
-    print(f"  Předfiltrováno:        {len(pre_filtered)}")
-    print(f"  Z cache (přeskočeno): {cache_hits}")
-    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
-    print(f"  Vyloučeno (patro):     {excluded_floor}")
-    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
-    print(f"  ✓ Vyhovující byty:    {len(results)}")
-    print(f"{'=' * 60}")
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Výsledky Realingo:")
+    logger.info(f"  Předfiltrováno:        {len(pre_filtered)}")
+    logger.info(f"  Z cache (přeskočeno): {cache_hits}")
+    logger.info(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    logger.info(f"  Vyloučeno (patro):     {excluded_floor}")
+    logger.info(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    logger.info(f"  ✓ Vyhovující byty:    {len(results)}")
+    logger.info(f"{'=' * 60}")

+    write_stats(STATS_FILE, {
+        "source": "Realingo",
+        "timestamp": _run_ts,
+        "duration_sec": round(time.time() - _run_start, 1),
+        "success": True,
+        "accepted": len(results),
+        "fetched": len(all_listings),
+        "pages": page - 1,
+        "cache_hits": cache_hits,
+        "excluded": {
+            "dispozice": excluded_category,
+            "cena": excluded_price,
+            "plocha": excluded_area,
+            "bez GPS": excluded_no_gps,
+            "panel/síd": excluded_panel,
+            "patro": excluded_floor,
+            "bez detailu": excluded_detail,
+        },
+    })
    return results


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Scrape apartments from Realingo.cz")
+    parser.add_argument("--max-pages", type=int, default=None,
+                        help="Maximum number of listing pages to scrape")
+    parser.add_argument("--max-properties", type=int, default=None,
+                        help="Maximum number of properties to fetch details for")
+    parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                        help="Logging level (default: INFO)")
+    args = parser.parse_args()
+
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+        handlers=[logging.StreamHandler()]
+    )
+
+    _run_ts = datetime.now().isoformat(timespec="seconds")
    start = time.time()
-    estates = scrape()
+    try:
+        estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+    except Exception as e:
+        logger.error(f"Scraper failed: {e}", exc_info=True)
+        write_stats(STATS_FILE, {
+            "source": "Realingo",
+            "timestamp": _run_ts,
+            "duration_sec": round(time.time() - start, 1),
+            "success": False,
+            "accepted": 0,
+            "fetched": 0,
+            "error": str(e),
+        })
+        raise

    if estates:
        json_path = Path("byty_realingo.json")
@@ -305,7 +412,7 @@ if __name__ == "__main__":
            encoding="utf-8",
        )
        elapsed = time.time() - start
-        print(f"\n✓ Data uložena: {json_path.resolve()}")
-        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+        logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+        logger.info(f"⏱  Celkový čas: {elapsed:.0f} s")
    else:
-        print("\nŽádné byty z Realinga neodpovídají kritériím :(")
+        logger.info("\nŽádné byty z Realinga neodpovídají kritériím :(")
--- a/scraper_stats.py
+++ b/scraper_stats.py
@@ -0,0 +1,55 @@
+"""Shared utilities for scraper run statistics and listing validation."""
+from __future__ import annotations
+
+import json
+import logging
+import os
+from pathlib import Path
+
+HERE = Path(__file__).parent
+DATA_DIR = Path(os.environ.get("DATA_DIR", HERE))
+
+_val_log = logging.getLogger(__name__)
+
+_REQUIRED_FIELDS = ("hash_id", "price", "locality", "lat", "lon", "url", "source")
+
+
+def validate_listing(listing: dict, context: str = "") -> bool:
+    """
+    Validate a listing dict before it is written to the output JSON.
+    Returns True if valid, False if the listing should be skipped.
+    Logs a warning for each invalid listing.
+    """
+    prefix = f"[{context}] " if context else ""
+
+    for field in _REQUIRED_FIELDS:
+        val = listing.get(field)
+        if val is None or val == "":
+            _val_log.warning(f"{prefix}Skipping listing — missing field '{field}': {listing.get('hash_id', '?')}")
+            return False
+
+    price = listing.get("price")
+    if not isinstance(price, (int, float)) or price <= 0:
+        _val_log.warning(f"{prefix}Skipping listing — invalid price={price!r}: {listing.get('hash_id', '?')}")
+        return False
+
+    lat, lon = listing.get("lat"), listing.get("lon")
+    if not isinstance(lat, (int, float)) or not isinstance(lon, (int, float)):
+        _val_log.warning(f"{prefix}Skipping listing — non-numeric GPS lat={lat!r} lon={lon!r}: {listing.get('hash_id', '?')}")
+        return False
+    if not (47.0 <= lat <= 52.0) or not (12.0 <= lon <= 19.0):
+        _val_log.warning(f"{prefix}Skipping listing — GPS outside Czech Republic lat={lat} lon={lon}: {listing.get('hash_id', '?')}")
+        return False
+
+    area = listing.get("area")
+    if area is not None and (not isinstance(area, (int, float)) or area <= 0):
+        _val_log.warning(f"{prefix}Skipping listing — invalid area={area!r}: {listing.get('hash_id', '?')}")
+        return False
+
+    return True
+
+
+def write_stats(filename: str, stats: dict) -> None:
+    """Write scraper run stats dict to the data directory."""
+    path = DATA_DIR / filename
+    path.write_text(json.dumps(stats, ensure_ascii=False, indent=2), encoding="utf-8")
--- a/server.py
+++ b/server.py
@@ -0,0 +1,477 @@
+#!/usr/bin/env python3
+"""
+General-purpose HTTP server for maru-hleda-byt.
+
+Serves static files from DATA_DIR and additionally handles:
+  GET  /scrapers-status        → SSR scraper status page
+  GET  /api/ratings            → ratings.json contents
+  POST /api/ratings            → save entire ratings object
+  GET  /api/ratings/export     → same as GET, with download header
+  GET  /api/status             → status.json contents (JSON)
+  GET  /api/status/history     → scraper_history.json contents (JSON)
+"""
+
+from __future__ import annotations
+
+import functools
+import json
+import logging
+import os
+import sys
+from datetime import datetime
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+from pathlib import Path
+
+PORT = int(os.environ.get("SERVER_PORT", 8080))
+DATA_DIR = Path(os.environ.get("DATA_DIR", "."))
+RATINGS_FILE = DATA_DIR / "ratings.json"
+_LOG_LEVEL = getattr(logging, os.environ.get("LOG_LEVEL", "INFO").upper(), logging.INFO)
+
+logging.basicConfig(
+    level=_LOG_LEVEL,
+    format="%(asctime)s [server] %(levelname)s %(message)s",
+    datefmt="%Y-%m-%dT%H:%M:%S",
+)
+log = logging.getLogger(__name__)
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+COLORS = {
+    "sreality":    "#1976D2",
+    "realingo":    "#7B1FA2",
+    "bezrealitky": "#E65100",
+    "idnes":       "#C62828",
+    "psn":         "#2E7D32",
+    "cityhome":    "#00838F",
+}
+
+MONTHS_CZ = [
+    "ledna", "února", "března", "dubna", "května", "června",
+    "července", "srpna", "září", "října", "listopadu", "prosince",
+]
+
+
+def _load_json(path: Path, default=None):
+    """Read and parse JSON file; return default on missing or parse error."""
+    log.debug("_load_json: %s", path.resolve())
+    try:
+        if path.exists():
+            return json.loads(path.read_text(encoding="utf-8"))
+    except Exception as e:
+        log.warning("Failed to load %s: %s", path, e)
+    return default
+
+
+def _fmt_date(iso_str: str) -> str:
+    """Format ISO timestamp as Czech date string."""
+    try:
+        d = datetime.fromisoformat(iso_str)
+        return f"{d.day}. {MONTHS_CZ[d.month - 1]} {d.year}, {d.hour:02d}:{d.minute:02d}"
+    except Exception:
+        return iso_str
+
+
+def load_ratings() -> dict:
+    return _load_json(RATINGS_FILE, default={})
+
+
+def save_ratings(data: dict) -> None:
+    RATINGS_FILE.write_text(
+        json.dumps(data, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+
+# ── SSR status page ──────────────────────────────────────────────────────────
+
+_CSS = """\
+* { margin: 0; padding: 0; box-sizing: border-box; }
+body {
+  font-family: system-ui, -apple-system, sans-serif;
+  background: #f5f5f5; color: #333;
+  padding: 24px; max-width: 640px; margin: 0 auto;
+}
+h1 { font-size: 22px; margin-bottom: 4px; }
+.subtitle { color: #888; font-size: 13px; margin-bottom: 24px; }
+.card {
+  background: white; border-radius: 12px; padding: 20px;
+  box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px;
+}
+.card h2 { font-size: 15px; margin-bottom: 12px; color: #555; }
+.timestamp { font-size: 28px; font-weight: 700; color: #1976D2; }
+.timestamp-sub { font-size: 13px; color: #999; margin-top: 2px; }
+.summary-row {
+  display: flex; justify-content: space-between; align-items: center;
+  padding: 10px 0; border-bottom: 1px solid #f0f0f0;
+}
+.summary-row:last-child { border-bottom: none; }
+.summary-label { font-size: 13px; color: #666; }
+.summary-value { font-size: 18px; font-weight: 700; }
+.badge {
+  display: inline-block; padding: 2px 8px; border-radius: 4px;
+  font-size: 11px; font-weight: 600; color: white;
+}
+.badge-ok   { background: #4CAF50; }
+.badge-err  { background: #F44336; }
+.badge-skip { background: #FF9800; }
+.bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; }
+.bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; }
+.bar-fill  { height: 100%; border-radius: 4px; }
+.bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; }
+.loader-wrap {
+  display: flex; flex-direction: column; align-items: center;
+  justify-content: center; padding: 60px 0;
+}
+.spinner {
+  width: 40px; height: 40px; border: 4px solid #e0e0e0;
+  border-top-color: #1976D2; border-radius: 50%;
+  animation: spin 0.8s linear infinite;
+}
+@keyframes spin { to { transform: rotate(360deg); } }
+.loader-text { margin-top: 16px; color: #999; font-size: 14px; }
+.link-row { text-align: center; margin-top: 8px; }
+.link-row a { color: #1976D2; text-decoration: none; font-size: 14px; }
+.history-table { width: 100%; border-collapse: collapse; font-size: 12px; }
+.history-table th {
+  text-align: left; font-weight: 600; color: #999; font-size: 11px;
+  padding: 4px 6px 8px 6px; border-bottom: 2px solid #f0f0f0;
+}
+.history-table td { padding: 7px 6px; border-bottom: 1px solid #f5f5f5; vertical-align: middle; }
+.history-table tr:last-child td { border-bottom: none; }
+.history-table tr.latest td { background: #f8fbff; font-weight: 600; }
+.src-nums { display: flex; gap: 4px; flex-wrap: wrap; }
+.src-chip {
+  display: inline-block; padding: 1px 5px; border-radius: 3px;
+  font-size: 10px; color: white; font-variant-numeric: tabular-nums;
+}
+.clickable-row { cursor: pointer; }
+.clickable-row:hover td { background: #f0f7ff !important; }
+/* Modal */
+#md-overlay {
+  position: fixed; inset: 0; background: rgba(0,0,0,0.45);
+  display: flex; align-items: flex-start; justify-content: center;
+  z-index: 1000; padding: 40px 16px; overflow-y: auto;
+}
+#md-box {
+  background: white; border-radius: 12px; padding: 24px;
+  width: 100%; max-width: 620px; position: relative;
+  box-shadow: 0 8px 32px rgba(0,0,0,0.24); margin: auto;
+}
+#md-close {
+  position: absolute; top: 10px; right: 14px;
+  background: none; border: none; font-size: 26px; cursor: pointer;
+  color: #aaa; line-height: 1;
+}
+#md-close:hover { color: #333; }
+#md-box h3 { font-size: 15px; margin-bottom: 14px; padding-right: 24px; }
+.md-summary { display: flex; gap: 20px; flex-wrap: wrap; font-size: 13px; margin-bottom: 16px; color: #555; }
+.md-summary b { color: #333; }
+.detail-table { width: 100%; border-collapse: collapse; font-size: 12px; }
+.detail-table th {
+  text-align: left; color: #999; font-size: 11px; font-weight: 600;
+  padding: 4px 8px 6px 0; border-bottom: 2px solid #f0f0f0; white-space: nowrap;
+}
+.detail-table td { padding: 6px 8px 6px 0; border-bottom: 1px solid #f5f5f5; vertical-align: top; }
+.detail-table tr:last-child td { border-bottom: none; }
+"""
+
+_SOURCE_ORDER = ["Sreality", "Realingo", "Bezrealitky", "iDNES", "PSN", "CityHome"]
+_SOURCE_ABBR  = ["Sre", "Rea", "Bez", "iDN", "PSN", "CH"]
+
+
+def _sources_html(sources: list) -> str:
+    if not sources:
+        return ""
+    max_count = max((s.get("accepted", 0) for s in sources), default=1) or 1
+    parts = ['<div class="card"><h2>Zdroje</h2>']
+    for s in sources:
+        name = s.get("name", "?")
+        accepted = s.get("accepted", 0)
+        error = s.get("error")
+        exc = s.get("excluded", {})
+        excluded_total = sum(exc.values()) if isinstance(exc, dict) else s.get("excluded_total", 0)
+        color = COLORS.get(name.lower(), "#999")
+        pct = round(accepted / max_count * 100) if max_count else 0
+        if error:
+            badge = '<span class="badge badge-err">chyba</span>'
+        elif accepted == 0:
+            badge = '<span class="badge badge-skip">0</span>'
+        else:
+            badge = '<span class="badge badge-ok">OK</span>'
+        parts.append(
+            f'<div style="margin-bottom:12px;">'
+            f'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">'
+            f'<span style="font-weight:600;font-size:14px;">{name} {badge}</span>'
+            f'<span style="font-size:12px;color:#999;">{excluded_total} vyloučených</span>'
+            f'</div>'
+            f'<div class="bar-row">'
+            f'<div class="bar-track"><div class="bar-fill" style="width:{pct}%;background:{color};"></div></div>'
+            f'<span class="bar-count">{accepted}</span>'
+            f'</div></div>'
+        )
+    parts.append("</div>")
+    return "".join(parts)
+
+
+def _history_html(history: list) -> str:
+    if not history:
+        return ""
+    rows = list(reversed(history))
+    parts = [
+        '<div class="card">'
+        '<h2>Historie běhů <span style="font-size:11px;font-weight:400;color:#bbb;">– klikni pro detaily</span></h2>',
+        '<table class="history-table"><thead><tr>',
+        '<th>Datum</th><th>Trvání</th><th>Přijato&nbsp;/&nbsp;Dedup</th><th>Zdroje</th><th>OK</th>',
+        '</tr></thead><tbody>',
+    ]
+    for i, entry in enumerate(rows):
+        row_class = ' class="latest clickable-row"' if i == 0 else ' class="clickable-row"'
+        src_map = {s["name"]: s for s in entry.get("sources", []) if "name" in s}
+        chips = "".join(
+            f'<span class="src-chip" style="background:{"#F44336" if (src_map.get(name) or {}).get("error") else COLORS.get(name.lower(), "#999")}" title="{name}">'
+            f'{abbr}&nbsp;{src_map[name].get("accepted", 0) if name in src_map else "-"}</span>'
+            for name, abbr in zip(_SOURCE_ORDER, _SOURCE_ABBR)
+        )
+        ok_badge = (
+            '<span class="badge badge-err">chyba</span>'
+            if entry.get("success") is False
+            else '<span class="badge badge-ok">OK</span>'
+        )
+        dur = f'{entry["duration_sec"]}s' if entry.get("duration_sec") is not None else "-"
+        parts.append(
+            f'<tr{row_class} data-idx="{i}">'
+            f'<td>{_fmt_date(entry.get("timestamp", ""))}</td>'
+            f'<td>{dur}</td>'
+            f'<td>{entry.get("total_accepted", "-")}&nbsp;/&nbsp;{entry.get("deduplicated", "-")}</td>'
+            f'<td><div class="src-nums">{chips}</div></td>'
+            f'<td>{ok_badge}</td>'
+            f'</tr>'
+        )
+    parts.append("</tbody></table></div>")
+    return "".join(parts)
+
+
+def _modal_script(rows_json: str) -> str:
+    """Return the modal overlay HTML + JS for the history detail popup."""
+    return (
+        '<div id="md-overlay" style="display:none">'
+        '<div id="md-box"><button id="md-close">&times;</button>'
+        '<div id="md-body"></div></div></div>\n'
+        '<script>\n(function(){\n'
+        f'var H={rows_json};\n'
+        'var C={"sreality":"#1976D2","realingo":"#7B1FA2","bezrealitky":"#E65100","idnes":"#C62828","psn":"#2E7D32","cityhome":"#00838F"};\n'
+        'var MN=["ledna","února","března","dubna","května","června","července","srpna","září","října","listopadu","prosince"];\n'
+        'function fd(s){var d=new Date(s);return d.getDate()+". "+MN[d.getMonth()]+" "+d.getFullYear()+", "+String(d.getHours()).padStart(2,"0")+":"+String(d.getMinutes()).padStart(2,"0");}\n'
+        'function openModal(idx){\n'
+        '  var e=H[idx],src=e.sources||[];\n'
+        '  var h="<h3>Detaily b\u011bhu \u2013 "+fd(e.timestamp)+"</h3>";\n'
+        '  h+="<div class=\\"md-summary\\">";\n'
+        '  if(e.duration_sec!=null) h+="<span><b>Trvání:</b> "+e.duration_sec+"s</span>";\n'
+        '  if(e.total_accepted!=null) h+="<span><b>Přijato:</b> "+e.total_accepted+"</span>";\n'
+        '  if(e.deduplicated!=null) h+="<span><b>Po dedup:</b> "+e.deduplicated+"</span>";\n'
+        '  h+="</div>";\n'
+        '  h+="<table class=\\"detail-table\\"><thead><tr>";\n'
+        '  h+="<th>Zdroj</th><th>Přijato</th><th>Staženo</th><th>Stránky</th><th>Cache</th><th>Vyloučeno</th><th>Čas</th><th>OK</th>";\n'
+        '  h+="</tr></thead><tbody>";\n'
+        '  src.forEach(function(s){\n'
+        '    var nm=s.name||"?",col=C[nm.toLowerCase()]||"#999";\n'
+        '    var exc=s.excluded||{};\n'
+        '    var excStr=Object.entries(exc).filter(function(kv){return kv[1]>0;}).map(function(kv){return kv[0]+":&nbsp;"+kv[1];}).join(", ")||"\u2013";\n'
+        '    var ok=s.error?"<span class=\\"badge badge-err\\" title=\\""+s.error+"\\">chyba</span>":"<span class=\\"badge badge-ok\\">OK</span>";\n'
+        '    var dot="<span style=\\"display:inline-block;width:8px;height:8px;border-radius:50%;background:"+col+";margin-right:5px;\\"></span>";\n'
+        '    h+="<tr>";\n'
+        '    h+="<td>"+dot+nm+"</td>";\n'
+        '    h+="<td>"+(s.accepted!=null?s.accepted:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.fetched!=null?s.fetched:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.pages!=null?s.pages:"\u2013")+"</td>";\n'
+        '    h+="<td>"+(s.cache_hits!=null?s.cache_hits:"\u2013")+"</td>";\n'
+        '    h+="<td style=\\"font-size:11px;color:#666;\\">"+excStr+"</td>";\n'
+        '    h+="<td>"+(s.duration_sec!=null?s.duration_sec+"s":"\u2013")+"</td>";\n'
+        '    h+="<td>"+ok+"</td></tr>";\n'
+        '  });\n'
+        '  h+="</tbody></table>";\n'
+        '  document.getElementById("md-body").innerHTML=h;\n'
+        '  document.getElementById("md-overlay").style.display="flex";\n'
+        '}\n'
+        'function closeModal(){document.getElementById("md-overlay").style.display="none";}\n'
+        'var tb=document.querySelector(".history-table tbody");\n'
+        'if(tb)tb.addEventListener("click",function(e){var tr=e.target.closest("tr[data-idx]");if(tr)openModal(parseInt(tr.dataset.idx,10));});\n'
+        'document.getElementById("md-close").addEventListener("click",closeModal);\n'
+        'document.getElementById("md-overlay").addEventListener("click",function(e){if(e.target===this)closeModal();});\n'
+        'document.addEventListener("keydown",function(e){if(e.key==="Escape")closeModal();});\n'
+        '})();\n</script>'
+    )
+
+
+def _render_status_html(status: dict | None, history: list, is_running: bool = False) -> str:
+    """Generate the complete HTML page for /scrapers-status."""
+    head_open = (
+        '<!DOCTYPE html>\n<html lang="cs">\n<head>\n'
+        '<meta charset="UTF-8">\n'
+        '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
+        f'<title>Scraper status</title>\n<style>{_CSS}</style>\n'
+    )
+    page_header = '<h1>Scraper status</h1>\n<div class="subtitle">maru-hleda-byt</div>\n'
+    footer = '<div class="link-row"><a href="/mapa_bytu.html">Otevřít mapu</a></div>'
+
+    if status is None:
+        return (
+            head_open + '</head>\n<body>\n' + page_header
+            + '<div class="card"><p style="color:#F44336">Status není k dispozici.</p></div>\n'
+            + footer + '\n</body>\n</html>'
+        )
+
+    if is_running:
+        return (
+            head_open
+            + '<meta http-equiv="refresh" content="30">\n'
+            + '</head>\n<body>\n' + page_header
+            + '<div class="loader-wrap"><div class="spinner"></div>'
+            + '<div class="loader-text">Scraper právě běží…</div></div>\n'
+            + footer + '\n</body>\n</html>'
+        )
+
+    # ── Done state ────────────────────────────────────────────────────────────
+    ts = status.get("timestamp", "")
+    duration = status.get("duration_sec")
+    total_accepted = status.get("total_accepted", 0)
+    deduplicated = status.get("deduplicated")
+
+    ts_card = (
+        '<div class="card"><h2>Poslední scrape</h2>'
+        f'<div class="timestamp">{_fmt_date(ts)}</div>'
+        + (f'<div class="timestamp-sub">Trvání: {round(duration)}s</div>' if duration is not None else "")
+        + '</div>'
+    )
+
+    sum_card = (
+        '<div class="card"><h2>Souhrn</h2>'
+        f'<div class="summary-row"><span class="summary-label">Vyhovujících bytů</span>'
+        f'<span class="summary-value" style="color:#4CAF50">{total_accepted}</span></div>'
+        + (
+            f'<div class="summary-row"><span class="summary-label">Po deduplikaci (v mapě)</span>'
+            f'<span class="summary-value" style="color:#1976D2">{deduplicated}</span></div>'
+            if deduplicated is not None else ""
+        )
+        + '</div>'
+    )
+
+    rows_for_js = list(reversed(history))
+    body = (
+        page_header
+        + ts_card + "\n"
+        + sum_card + "\n"
+        + _sources_html(status.get("sources", [])) + "\n"
+        + _history_html(history) + "\n"
+        + footer
+    )
+    modal = _modal_script(json.dumps(rows_for_js, ensure_ascii=False))
+    return head_open + '</head>\n<body>\n' + body + '\n' + modal + '\n</body>\n</html>'
+
+
+# ── HTTP handler ──────────────────────────────────────────────────────────────
+
+class Handler(SimpleHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass  # suppress default access log; use our own where needed
+
+    def _send_json(self, status: int, body, extra_headers=None):
+        payload = json.dumps(body, ensure_ascii=False).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json; charset=utf-8")
+        self.send_header("Content-Length", str(len(payload)))
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        if extra_headers:
+            for k, v in extra_headers.items():
+                self.send_header(k, v)
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def do_OPTIONS(self):
+        self.send_response(204)
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+
+    def do_GET(self):
+        if self.path.startswith("/api/"):
+            self._handle_api_get()
+        elif self.path.rstrip("/") == "/scrapers-status":
+            self._serve_status_page()
+        else:
+            log.debug("GET %s → static file: %s", self.path, self.translate_path(self.path))
+            super().do_GET()
+
+    def _handle_api_get(self):
+        if self.path in ("/api/ratings", "/api/ratings/export"):
+            ratings = load_ratings()
+            extra = None
+            if self.path == "/api/ratings/export":
+                extra = {"Content-Disposition": 'attachment; filename="ratings.json"'}
+            log.info("GET %s → %d ratings", self.path, len(ratings))
+            self._send_json(200, ratings, extra)
+        elif self.path == "/api/status":
+            data = _load_json(DATA_DIR / "status.json")
+            if data is None:
+                self._send_json(404, {"error": "status not available"})
+                return
+            log.info("GET /api/status → ok")
+            self._send_json(200, data)
+        elif self.path == "/api/status/history":
+            data = _load_json(DATA_DIR / "scraper_history.json", default=[])
+            if not isinstance(data, list):
+                data = []
+            log.info("GET /api/status/history → %d entries", len(data))
+            self._send_json(200, data)
+        else:
+            self._send_json(404, {"error": "not found"})
+
+    def _serve_status_page(self):
+        status = _load_json(DATA_DIR / "status.json")
+        history = _load_json(DATA_DIR / "scraper_history.json", default=[])
+        if not isinstance(history, list):
+            history = []
+        is_running = (DATA_DIR / "scraper_running.json").exists()
+        html = _render_status_html(status, history, is_running)
+        payload = html.encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def do_POST(self):
+        if self.path == "/api/ratings":
+            length = int(self.headers.get("Content-Length", 0))
+            if length == 0:
+                self._send_json(400, {"error": "empty body"})
+                return
+            try:
+                raw = self.rfile.read(length)
+                data = json.loads(raw.decode("utf-8"))
+            except Exception as e:
+                log.warning("Bad request body: %s", e)
+                self._send_json(400, {"error": "invalid JSON"})
+                return
+            if not isinstance(data, dict):
+                self._send_json(400, {"error": "expected JSON object"})
+                return
+            save_ratings(data)
+            log.info("POST /api/ratings → saved %d ratings", len(data))
+            self._send_json(200, {"ok": True, "count": len(data)})
+        else:
+            self._send_json(404, {"error": "not found"})
+
+
+if __name__ == "__main__":
+    log.info("Server starting on port %d, data dir: %s", PORT, DATA_DIR)
+    handler = functools.partial(Handler, directory=str(DATA_DIR))
+    server = HTTPServer(("0.0.0.0", PORT), handler)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        log.info("Stopped.")
+        sys.exit(0)
Author	SHA1	Message	Date
Marie Michalova	8c052840cd	Move Realingo scraper to run last in pipeline Reorder scrapers: Sreality → Bezrealitky → iDNES → PSN+CityHome → Realingo → Merge Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-27 21:35:54 +01:00
kacerr	39e4b9ce2a	Merge pull request 'Reliability improvements and cleanup' (#5 ) from improve/reliability-and-fixes into main Reviewed-on: #5	2026-02-27 10:26:04 +00:00
Jan Novak	fd3991f8d6	Remove regen_map.py references from Dockerfile and README All checks were successful Build and Push / build (push) Successful in 6s Details Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-27 10:44:08 +01:00
Jan Novak	27a7834eb6	Reliability improvements: retry logic, validation, ratings sync Some checks failed Build and Push / build (push) Failing after 4s Details - Add 3-attempt retry with exponential backoff to Sreality, Realingo, Bezrealitky, and PSN scrapers (CityHome and iDNES already had it) - Add shared validate_listing() in scraper_stats.py; all 6 scrapers now validate GPS bounds, price, area, and required fields before output - Wire ratings to server /api/ratings on page load (merge with localStorage) and save (async POST); ratings now persist across browsers and devices - Namespace JS hash IDs as {source}_{id} to prevent rating collisions between listings from different portals with the same numeric ID - Replace manual Czech diacritic table with unicodedata.normalize() in merge_and_map.py for correct deduplication of all edge cases - Correct README schedule docs: every 4 hours, not twice daily Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-27 10:36:37 +01:00
Marie Michalova	57a9f6f21a	Add NEW badge for recent listings, text input for price filter, cleanup - New listings (≤1 day) show yellow NEW badge instead of oversized marker - Price filter changed from dropdown to text input (max 14M) - Cap price filter at 14M in JS - Remove unused regen_map.py - Remove unused HTMLParser import in scrape_idnes.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-26 21:14:48 +01:00
Marie Michalova	0ea31d3013	Remove tracked generated/data files and fix map link on status page - Remove byty_*.json, mapa_bytu.html, .DS_Store and settings.local.json from git tracking (already in .gitignore, files kept locally) - Fix "Otevřít mapu" link on scraper status page: / → /mapa_bytu.html Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-26 20:42:35 +01:00
Jan Novak	4304a42776	Track first_seen/last_changed per property, add map filters and clickable legend All checks were successful Build and Push / build (push) Successful in 6s Details Scraper changes (all 6 sources): - Add first_seen: date the hash_id was first scraped, never overwritten - Add last_changed: date the price last changed (= first_seen when new) - PSN and CityHome load previous output as a lightweight cache to compute these fields - merge_and_map.py preserves earliest first_seen when deduplicating cross-source duplicates Map popup: - Show "Přidáno: YYYY-MM-DD" and "Změněno: YYYY-MM-DD" in each property popup - NOVÉ badge and pulsing marker now driven by first_seen == today (more accurate than scraped_at) Map filters (sidebar): - New "Přidáno / změněno" dropdown: 1, 2, 3, 4, 5, 7, 14, 30 days or all - Clickable price/m² legend bands: click to filter to that band, multi-select supported - "✕ Zobrazit všechny ceny" reset link appears when any band is active Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 16:58:46 +01:00
kacerr	23d208a5b7	Merge pull request 'Add scraper status collection and presentation' (#3 ) from add-scraper-statuses into main Reviewed-on: #3	2026-02-26 09:04:23 +00:00
Jan Novak	00c9144010	Fix DATA_DIR usage in stats/history paths, set env in Dockerfile, add validation docs All checks were successful Build and Push / build (push) Successful in 5s Details - scraper_stats.py: respect DATA_DIR env var when writing stats_*.json files - generate_status.py: read stats files and write history from DATA_DIR instead of HERE - build/Dockerfile: set DATA_DIR=/app/data as default env var - docs/validation.md: end-to-end Docker validation recipe Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 09:46:16 +01:00
Jan Novak	44c02b45b4	Increase history retention to 20, run scrapers every 4 hours All checks were successful Build and Push / build (push) Successful in 7s Details - generate_status.py: raise --keep default from 5 to 20 entries - build/crontab: change schedule from 06:00/18:00 to every 4 hours (*/4) covers 6 runs/day ≈ 3.3 days of history at default retention Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 08:53:27 +01:00
Jan Novak	5fb3b984b6	Add status dashboard, server, scraper stats, and DATA_DIR support All checks were successful Build and Push / build (push) Successful in 7s Details Key changes: - Replace ratings_server.py + status.html with a unified server.py that serves the map, scraper status dashboard, and ratings API in one process - Add scraper_stats.py utility: each scraper writes per-run stats (fetched, accepted, excluded, duration) to stats_<source>.json for the status page - generate_status.py: respect DATA_DIR env var so status.json lands in the configured data directory instead of always the project root - run_all.sh: replace the {"status":"running"} overwrite of status.json with a dedicated scraper_running.json lock file; trap on EXIT ensures cleanup even on kill/error, preventing the previous run's results from being wiped - server.py: detect running state via scraper_running.json existence instead of status["status"] field, eliminating the dual-use race condition - Makefile: add serve (local dev), debug (Docker debug container) targets; add SERVER_PORT variable - build/Dockerfile + entrypoint.sh: switch to server.py, set DATA_DIR, adjust volume mounts - .gitignore: add .json and .log to keep runtime data files out of VCS - mapa_bytu.html: price-per-m² colouring, status link, UX tweaks Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-02-26 00:30:25 +01:00
kacerr	6f49533c94	Merge pull request 'Rewrite PSN + CityHome scrapers, add price/m² map coloring, ratings system, and status dashboard' (#2 ) from ui-tweaks/2026-02-17 into main Reviewed-on: #2	2026-02-25 21:26:51 +00:00
Marie Michalova	b8d4d44164	Rewrite PSN + CityHome scrapers, add price/m² map coloring, ratings system, and status dashboard - Rewrite PSN scraper to use /api/units-list endpoint (single API call, no HTML parsing) - Fix CityHome scraper: GPS from multiple URL patterns, address from table cells, no 404 retries - Color map markers by price/m² instead of disposition (blue→green→orange→red scale) - Add persistent rating system (favorite/reject) with Flask ratings server and localStorage fallback - Rejected markers show original color at reduced opacity with 🚫 SVG overlay - Favorite markers shown as ⭐ star icons with gold pulse animation - Add "new today" marker logic (scraped_at == today) with larger pulsing green outline - Add filter panel with floor, price, hide-rejected controls and ☰/✕ toggle buttons - Add generate_status.py for scraper run statistics and status.html dashboard - Add scraped_at field to all scrapers for freshness tracking - Update run_all.sh with log capture and status generation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-18 15:15:25 +01:00
Jan Novak	c6089f0da9	Add Gitea Actions CI pipeline for Docker image builds Triggers on tag push or manual dispatch. Builds the image using build/Dockerfile and pushes to the Gitea container registry. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-14 22:40:23 +00:00
Jan Novak	327688d9d2	Add comprehensive project documentation Cover the full pipeline (scrapers, merge, map generation), all 6 data sources with their parsing methods, filter criteria, CLI arguments, Docker setup, caching, rate limiting, and project structure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-14 22:40:23 +00:00
Jan Novak	09a853aa05	Add validation mode, structured logging, and CLI args to all scrapers - Replace print() with Python logging module across all 6 scrapers for configurable log levels (DEBUG/INFO/WARNING/ERROR) - Add --max-pages, --max-properties, and --log-level CLI arguments to each scraper via argparse for limiting scrape scope - Add validation Make targets (validation, validation-local, validation-local-debug) for quick test runs with limited data - Update run_all.sh to parse and forward CLI args to all scrapers - Update mapa_bytu.html with latest scrape results Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-14 22:40:23 +00:00
Jan Novak	5207c48890	add docker build, makefile, and some more shit before we move forward	2026-02-14 22:40:23 +00:00
				`@@ -0,0 +1 @@`
				`0 /4 * * cd /app && bash /app/run_all.sh >> /proc/1/fd/1 2>> /proc/1/fd/2`