Compare commits
7 Commits
test-image
...
0.03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2bc3f452f | ||
| b8d4d44164 | |||
|
|
c6089f0da9 | ||
|
|
327688d9d2 | ||
|
|
09a853aa05 | ||
|
|
5207c48890 | ||
| 215b51aadb |
31
.claude/settings.local.json
Normal file
31
.claude/settings.local.json
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"WebFetch(domain:github.com)",
|
||||||
|
"WebFetch(domain:www.sreality.cz)",
|
||||||
|
"WebFetch(domain:webscraping.pro)",
|
||||||
|
"WebFetch(domain:raw.githubusercontent.com)",
|
||||||
|
"Bash(python3:*)",
|
||||||
|
"Bash(open:*)",
|
||||||
|
"WebFetch(domain:www.realingo.cz)",
|
||||||
|
"WebFetch(domain:api.realingo.cz)",
|
||||||
|
"Bash(curl:*)",
|
||||||
|
"Bash(grep:*)",
|
||||||
|
"WebFetch(domain:www.realitni-pes.cz)",
|
||||||
|
"WebFetch(domain:www.bezrealitky.cz)",
|
||||||
|
"WebFetch(domain:apify.com)",
|
||||||
|
"WebFetch(domain:www.bezrealitky.com)",
|
||||||
|
"WebFetch(domain:reality.idnes.cz)",
|
||||||
|
"Bash(# Final checks: robots.txt and response time for rate limiting clues curl -s -L -H \"\"User-Agent: Mozilla/5.0 \\(Windows NT 10.0; Win64; x64\\) AppleWebKit/537.36 \\(KHTML, like Gecko\\) Chrome/120.0.0.0 Safari/537.36\"\" \"\"https://reality.idnes.cz/robots.txt\"\")",
|
||||||
|
"WebFetch(domain:www.cityhome.cz)",
|
||||||
|
"WebFetch(domain:www.psn.cz)",
|
||||||
|
"WebFetch(domain:www.city-home.cz)",
|
||||||
|
"WebFetch(domain:psn.cz)",
|
||||||
|
"WebFetch(domain:api.psn.cz)",
|
||||||
|
"Bash(done)",
|
||||||
|
"Bash(# Final summary: count total units across all projects\n# Get the total count from the unitsCountData we already extracted\necho \"\"From unitsCountData on /prodej page:\"\"\necho \"\" type_id 0 \\(Prodej bytů a ateliérů\\): 146\"\"\necho \"\" type_id 1 \\(Prodej komerčních nemovitostí\\): 14\"\"\necho \"\" type_id 2 \\(Pronájem bytů\\): 3\"\"\necho \"\" type_id 3 \\(Pronájem komerčních nemovitostí\\): 48\"\"\necho \"\"\"\"\necho \"\"Total for-sale projects: 19\"\"\necho \"\"\"\"\necho \"\"Disposition counts from the data:\"\"\npython3 << 'PYEOF'\n# Extract disposition counts from prodej page\nimport re\n\nwith open\\('/tmp/psn_prodej_p1.html', 'r', encoding='utf-8'\\) as f:\n html = f.read\\(\\)\n\n# Find disposition data\nidx = html.find\\('\\\\\\\\\"disposition\\\\\\\\\":['\\)\nif idx >= 0:\n chunk = html[idx:idx+2000].replace\\('\\\\\\\\\"', '\"'\\)\n # Extract name and count pairs\n import re\n pairs = re.findall\\(r'\"name\":\"\\([^\"]+\\)\",\"count\":\\(\\\\d+\\)', chunk\\)\n for name, count in pairs:\n print\\(f\" {name}: {count}\"\\)\nPYEOF)",
|
||||||
|
"Bash(ls:*)",
|
||||||
|
"Bash(chmod:*)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
35
.gitea/workflows/build.yaml
Normal file
35
.gitea/workflows/build.yaml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: Build and Push
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Image tag'
|
||||||
|
required: true
|
||||||
|
default: 'latest'
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Login to Gitea registry
|
||||||
|
run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login -u ${{ github.actor }} --password-stdin gitea.home.hrajfrisbee.cz
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
run: |
|
||||||
|
TAG=${{ github.ref_name }}
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
TAG=${{ inputs.tag }}
|
||||||
|
fi
|
||||||
|
IMAGE=gitea.home.hrajfrisbee.cz/${{ github.repository }}:$TAG
|
||||||
|
docker build -f build/Dockerfile -t $IMAGE .
|
||||||
|
docker push $IMAGE
|
||||||
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
.vscode/
|
||||||
|
__pycache__/
|
||||||
|
byty_*.json
|
||||||
79
Makefile
Normal file
79
Makefile
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
IMAGE_NAME := maru-hleda-byt
|
||||||
|
CONTAINER_NAME := maru-hleda-byt
|
||||||
|
VOLUME_NAME := maru-hleda-byt-data
|
||||||
|
VALIDATION_CONTAINER := maru-hleda-byt-validation
|
||||||
|
VALIDATION_VOLUME := maru-hleda-byt-validation-data
|
||||||
|
PORT := 8080
|
||||||
|
|
||||||
|
.PHONY: build run stop logs scrape restart clean help validation validation-local validation-stop validation-local-debug serve serve-debug
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Available targets:"
|
||||||
|
@echo " build - Build the Docker image"
|
||||||
|
@echo " run - Build and run the Docker container in the background"
|
||||||
|
@echo " stop - Stop and remove the running container"
|
||||||
|
@echo " logs - Show live container logs"
|
||||||
|
@echo " scrape - Run the scraping script inside the container"
|
||||||
|
@echo " validation - Run scraping with limits (1 page, 10 properties) in Docker container"
|
||||||
|
@echo " validation-stop - Stop the validation Docker container"
|
||||||
|
@echo " validation-local - Run scraping with limits (1 page, 10 properties) locally with Python"
|
||||||
|
@echo " validation-local-debug - Run validation locally with DEBUG logging"
|
||||||
|
@echo " restart - Restart the container (stop and run again)"
|
||||||
|
@echo " clean - Stop container and remove the Docker image"
|
||||||
|
@echo " serve - Run server.py locally (DATA_DIR=., port $(PORT))"
|
||||||
|
@echo " serve-debug - Run server.py locally with DEBUG logging"
|
||||||
|
@echo " help - Show this help message"
|
||||||
|
|
||||||
|
build:
|
||||||
|
docker build -f build/Dockerfile -t $(IMAGE_NAME) .
|
||||||
|
|
||||||
|
run: build
|
||||||
|
docker run -d --name $(CONTAINER_NAME) \
|
||||||
|
-p $(PORT):8080 \
|
||||||
|
-v $(VOLUME_NAME):/app/data \
|
||||||
|
--restart unless-stopped \
|
||||||
|
$(IMAGE_NAME)
|
||||||
|
@echo "Map will be at http://localhost:$(PORT)/mapa_bytu.html"
|
||||||
|
|
||||||
|
stop:
|
||||||
|
docker stop $(CONTAINER_NAME) && docker rm $(CONTAINER_NAME)
|
||||||
|
|
||||||
|
logs:
|
||||||
|
docker logs -f $(CONTAINER_NAME)
|
||||||
|
|
||||||
|
scrape:
|
||||||
|
docker exec $(CONTAINER_NAME) bash /app/run_all.sh
|
||||||
|
|
||||||
|
validation: build
|
||||||
|
@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||||
|
@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||||
|
docker run -d --name $(VALIDATION_CONTAINER) \
|
||||||
|
-p 8081:8080 \
|
||||||
|
-v $(VALIDATION_VOLUME):/app/data \
|
||||||
|
--restart unless-stopped \
|
||||||
|
$(IMAGE_NAME)
|
||||||
|
@sleep 2
|
||||||
|
docker exec $(VALIDATION_CONTAINER) bash /app/run_all.sh --max-pages 1 --max-properties 10
|
||||||
|
@echo "Validation map will be at http://localhost:8081/mapa_bytu.html"
|
||||||
|
|
||||||
|
validation-stop:
|
||||||
|
@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||||
|
@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||||
|
@echo "Validation container stopped and removed"
|
||||||
|
|
||||||
|
validation-local:
|
||||||
|
./run_all.sh --max-pages 1 --max-properties 10
|
||||||
|
|
||||||
|
validation-local-debug:
|
||||||
|
./run_all.sh --max-pages 1 --max-properties 10 --log-level DEBUG
|
||||||
|
|
||||||
|
serve:
|
||||||
|
DATA_DIR=. PORT=$(PORT) python server.py
|
||||||
|
|
||||||
|
serve-debug:
|
||||||
|
DATA_DIR=. PORT=$(PORT) python server.py --verbose
|
||||||
|
|
||||||
|
restart: stop run
|
||||||
|
|
||||||
|
clean: stop
|
||||||
|
docker rmi $(IMAGE_NAME)
|
||||||
244
README.md
Normal file
244
README.md
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
# Maru hleda byt
|
||||||
|
|
||||||
|
Apartment search aggregator for Prague. Scrapes listings from 6 Czech real estate portals, filters them by configurable criteria, deduplicates across sources, and generates a single interactive map with all matching apartments.
|
||||||
|
|
||||||
|
Built for a specific use case: finding a 3+kk or larger apartment in Prague, excluding panel construction ("panelak") and housing estates ("sidliste"), with personal rating support.
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ run_all.sh │
|
||||||
|
│ Orchestrates all scrapers, then merges results into map │
|
||||||
|
├─────────┬──────────┬──────────┬────────┬────────┬───────────┤
|
||||||
|
│Sreality │Realingo │Bezreal. │iDNES │PSN │CityHome │
|
||||||
|
│ (API) │ (HTML) │ (HTML) │ (HTML) │ (HTML) │ (HTML) │
|
||||||
|
├─────────┴──────────┴──────────┴────────┴────────┴───────────┤
|
||||||
|
│ merge_and_map.py │
|
||||||
|
│ Loads all byty_*.json, deduplicates, generates HTML map │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ mapa_bytu.html │
|
||||||
|
│ Interactive Leaflet.js map with filters & ratings │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pipeline
|
||||||
|
|
||||||
|
1. **Scraping** -- Each scraper independently fetches listings from its portal, applies filters, and saves results to a JSON file (`byty_<source>.json`).
|
||||||
|
2. **Merging** -- `merge_and_map.py` loads all 6 JSON files, deduplicates listings (by street name + price + area), and generates the final `mapa_bytu.html`.
|
||||||
|
3. **Serving** -- The HTML map can be opened locally as a file, or served via Docker with a built-in HTTP server.
|
||||||
|
|
||||||
|
### Execution order in `run_all.sh`
|
||||||
|
|
||||||
|
Scrapers run sequentially (to avoid overwhelming any single portal), except PSN and CityHome which run in parallel (different sites). If a scraper fails, it is logged but does not abort the pipeline -- remaining scrapers continue.
|
||||||
|
|
||||||
|
```
|
||||||
|
1. scrape_and_map.py (Sreality)
|
||||||
|
2. scrape_realingo.py (Realingo)
|
||||||
|
3. scrape_bezrealitky.py (Bezrealitky)
|
||||||
|
4. scrape_idnes.py (iDNES Reality)
|
||||||
|
5. scrape_psn.py + scrape_cityhome.py (parallel)
|
||||||
|
6. merge_and_map.py (merge + map generation)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scrapers
|
||||||
|
|
||||||
|
All scrapers share the same CLI interface and a consistent two-phase approach:
|
||||||
|
|
||||||
|
1. **Phase 1** -- Fetch listing pages (paginated) to get a list of all available apartments.
|
||||||
|
2. **Phase 2** -- Fetch detail pages for each listing to get floor, construction type, and other data needed for filtering.
|
||||||
|
|
||||||
|
Each scraper uses a **JSON file cache**: if a listing's `hash_id` and `price` haven't changed since the last run, the cached data is reused and the detail page is not re-fetched. This significantly reduces runtime on subsequent runs.
|
||||||
|
|
||||||
|
### Source details
|
||||||
|
|
||||||
|
| Scraper | Portal | Data source | Output file | Notes |
|
||||||
|
|---------|--------|-------------|-------------|-------|
|
||||||
|
| `scrape_and_map.py` | [Sreality.cz](https://sreality.cz) | REST API (JSON) | `byty_sreality.json` | Main scraper. Also contains `generate_map()` used by all other scripts. |
|
||||||
|
| `scrape_realingo.py` | [Realingo.cz](https://realingo.cz) | `__NEXT_DATA__` JSON in HTML | `byty_realingo.json` | Next.js app, data extracted from server-side props. |
|
||||||
|
| `scrape_bezrealitky.py` | [Bezrealitky.cz](https://bezrealitky.cz) | `__NEXT_DATA__` Apollo cache in HTML | `byty_bezrealitky.json` | Next.js app with Apollo GraphQL cache in page source. |
|
||||||
|
| `scrape_idnes.py` | [Reality iDNES](https://reality.idnes.cz) | HTML parsing (regex) | `byty_idnes.json` | Traditional HTML site. GPS extracted from `dataLayer.push()`. Retry logic with 5 attempts and exponential backoff. |
|
||||||
|
| `scrape_psn.py` | [PSN.cz](https://psn.cz) | RSC (React Server Components) escaped JSON in HTML | `byty_psn.json` | Uses `curl` instead of `urllib` due to Cloudflare SSL issues. Hardcoded list of Prague projects with GPS coordinates. |
|
||||||
|
| `scrape_cityhome.py` | [city-home.cz](https://city-home.cz) | HTML table parsing (data attributes on `<tr>`) | `byty_cityhome.json` | CityHome/SATPO developer projects. GPS fetched from project locality pages. |
|
||||||
|
|
||||||
|
### Scraper filter criteria
|
||||||
|
|
||||||
|
All scrapers apply the same core filters (with minor per-source variations):
|
||||||
|
|
||||||
|
| Filter | Value | Notes |
|
||||||
|
|--------|-------|-------|
|
||||||
|
| **Max price** | 13 500 000 CZK | PSN and CityHome use 14 000 000 CZK |
|
||||||
|
| **Min area** | 69 m^2 | |
|
||||||
|
| **Min floor** | 2. NP (2nd floor) | 2nd floor apartments are included but flagged on the map |
|
||||||
|
| **Dispositions** | 3+kk, 3+1, 4+kk, 4+1, 5+kk, 5+1, 6+ | |
|
||||||
|
| **Region** | Praha | |
|
||||||
|
| **Construction** | Excludes panel ("panelak") | |
|
||||||
|
| **Location** | Excludes housing estates ("sidliste") | |
|
||||||
|
|
||||||
|
## Utility scripts
|
||||||
|
|
||||||
|
### `merge_and_map.py`
|
||||||
|
|
||||||
|
Merges all `byty_*.json` files into `byty_merged.json` and generates `mapa_bytu.html`.
|
||||||
|
|
||||||
|
**Deduplication logic:** Two listings are considered duplicates if they share the same normalized street name + price + area. PSN and CityHome have priority during dedup (loaded first), so their listings are kept over duplicates from other portals.
|
||||||
|
|
||||||
|
### `regen_map.py`
|
||||||
|
|
||||||
|
Regenerates the map from existing `byty_sreality.json` data without re-scraping. Fetches missing area values from the Sreality API, fixes URLs, and re-applies the area filter. Useful for tweaking map output after data has already been collected.
|
||||||
|
|
||||||
|
## Interactive map (`mapa_bytu.html`)
|
||||||
|
|
||||||
|
The generated map is a standalone HTML file using Leaflet.js with CARTO basemap tiles. Features:
|
||||||
|
|
||||||
|
- **Color-coded markers** by disposition (3+kk = blue, 3+1 = green, 4+kk = orange, etc.)
|
||||||
|
- **Heart-shaped markers** for PSN and CityHome listings (developer favorites)
|
||||||
|
- **Source badge** in each popup (Sreality, Realingo, Bezrealitky, iDNES, PSN, CityHome)
|
||||||
|
- **Client-side filters:** minimum floor, maximum price, hide rejected
|
||||||
|
- **Rating system** (persisted in `localStorage`):
|
||||||
|
- Star -- mark as favorite (enlarged marker with pulsing glow)
|
||||||
|
- Reject -- dim the marker, optionally hide it
|
||||||
|
- Notes -- free-text notes per listing
|
||||||
|
- **2nd floor warning** -- listings on 2. NP show an orange warning in the popup
|
||||||
|
- **Statistics panel** -- total count, price range, average price, disposition breakdown
|
||||||
|
|
||||||
|
## CLI arguments
|
||||||
|
|
||||||
|
All scrapers accept the same arguments. When run via `run_all.sh`, these arguments are forwarded to every scraper.
|
||||||
|
|
||||||
|
```
|
||||||
|
--max-pages N Maximum number of listing pages to scrape per source.
|
||||||
|
Limits the breadth of the initial listing fetch.
|
||||||
|
(For PSN: max pages per project)
|
||||||
|
|
||||||
|
--max-properties N Maximum number of properties to fetch details for per source.
|
||||||
|
Limits the depth of the detail-fetching phase.
|
||||||
|
|
||||||
|
--log-level LEVEL Logging verbosity. One of: DEBUG, INFO, WARNING, ERROR.
|
||||||
|
Default: INFO.
|
||||||
|
DEBUG shows HTTP request/response details, filter decisions
|
||||||
|
for every single listing, and cache hit/miss info.
|
||||||
|
|
||||||
|
-h, --help Show help message (run_all.sh only).
|
||||||
|
```
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full scrape (all pages, all properties)
|
||||||
|
./run_all.sh
|
||||||
|
|
||||||
|
# Quick validation run (1 page per source, max 10 properties each)
|
||||||
|
./run_all.sh --max-pages 1 --max-properties 10
|
||||||
|
|
||||||
|
# Full scrape with debug logging
|
||||||
|
./run_all.sh --log-level DEBUG
|
||||||
|
|
||||||
|
# Run a single scraper
|
||||||
|
python3 scrape_bezrealitky.py --max-pages 2 --max-properties 5 --log-level DEBUG
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running with Docker
|
||||||
|
|
||||||
|
The project includes a Docker setup for unattended operation with a cron-based schedule.
|
||||||
|
|
||||||
|
### Container architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ Container (python:3.13-alpine) │
|
||||||
|
│ │
|
||||||
|
│ PID 1: python3 -m http.server :8080 │
|
||||||
|
│ serves /app/data/ │
|
||||||
|
│ │
|
||||||
|
│ crond: runs run_all.sh at 06:00/18:00 │
|
||||||
|
│ Europe/Prague timezone │
|
||||||
|
│ │
|
||||||
|
│ /app/ -- scripts (.py, .sh) │
|
||||||
|
│ /app/data/ -- volume (JSON + HTML) │
|
||||||
|
│ ^ symlinked from /app/byty_* │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
On startup, the HTTP server starts immediately. The initial scrape runs in the background. Subsequent cron runs update data in-place twice daily at 06:00 and 18:00 CET/CEST.
|
||||||
|
|
||||||
|
### Quick start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make run # Build image + start container on port 8080
|
||||||
|
# Map available at http://localhost:8080/mapa_bytu.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Makefile targets
|
||||||
|
|
||||||
|
| Target | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `make help` | Show all available targets |
|
||||||
|
| `make build` | Build the Docker image |
|
||||||
|
| `make run` | Build and run the container (port 8080) |
|
||||||
|
| `make stop` | Stop and remove the container |
|
||||||
|
| `make logs` | Tail container logs |
|
||||||
|
| `make scrape` | Trigger a manual scrape inside the running container |
|
||||||
|
| `make restart` | Stop and re-run the container |
|
||||||
|
| `make clean` | Stop container and remove the Docker image |
|
||||||
|
| `make validation` | Run a limited scrape in a separate Docker container (port 8081) |
|
||||||
|
| `make validation-stop` | Stop the validation container |
|
||||||
|
| `make validation-local` | Run a limited scrape locally (1 page, 10 properties) |
|
||||||
|
| `make validation-local-debug` | Same as above with `--log-level DEBUG` |
|
||||||
|
|
||||||
|
### Validation mode
|
||||||
|
|
||||||
|
Validation targets run scrapers with `--max-pages 1 --max-properties 10` for a fast smoke test (~30 seconds instead of several minutes). The Docker validation target runs on port 8081 in a separate container so it doesn't interfere with production data.
|
||||||
|
|
||||||
|
## Project structure
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── scrape_and_map.py # Sreality scraper + map generator (generate_map())
|
||||||
|
├── scrape_realingo.py # Realingo scraper
|
||||||
|
├── scrape_bezrealitky.py # Bezrealitky scraper
|
||||||
|
├── scrape_idnes.py # iDNES Reality scraper
|
||||||
|
├── scrape_psn.py # PSN scraper
|
||||||
|
├── scrape_cityhome.py # CityHome scraper
|
||||||
|
├── merge_and_map.py # Merge all sources + generate final map
|
||||||
|
├── regen_map.py # Regenerate map from cached Sreality data
|
||||||
|
├── run_all.sh # Orchestrator script (runs all scrapers + merge)
|
||||||
|
├── mapa_bytu.html # Generated interactive map (output)
|
||||||
|
├── Makefile # Docker management + validation shortcuts
|
||||||
|
├── build/
|
||||||
|
│ ├── Dockerfile # Container image definition (python:3.13-alpine)
|
||||||
|
│ ├── entrypoint.sh # Container entrypoint (HTTP server + cron + initial scrape)
|
||||||
|
│ ├── crontab # Cron schedule (06:00 and 18:00 CET)
|
||||||
|
│ └── CONTAINER.md # Container-specific documentation
|
||||||
|
└── .gitignore # Ignores byty_*.json, __pycache__, .vscode
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
**None.** All scrapers use only the Python standard library (`urllib`, `json`, `re`, `argparse`, `logging`, `html.parser`). The only external tool required is `curl` (used by `scrape_psn.py` for Cloudflare TLS compatibility).
|
||||||
|
|
||||||
|
The Docker image is based on `python:3.13-alpine` (~70 MB) with `curl`, `bash`, and `tzdata` added.
|
||||||
|
|
||||||
|
## Caching behavior
|
||||||
|
|
||||||
|
Each scraper maintains a JSON file cache (`byty_<source>.json`). On each run:
|
||||||
|
|
||||||
|
1. The previous JSON file is loaded and indexed by `hash_id`.
|
||||||
|
2. For each listing found in the current run, if the `hash_id` exists in cache **and** the price is unchanged, the cached record is reused without fetching the detail page.
|
||||||
|
3. New or changed listings trigger a detail page fetch.
|
||||||
|
4. The JSON file is overwritten with the fresh results at the end.
|
||||||
|
|
||||||
|
This means the first run is slow (fetches every detail page with rate-limiting delays), but subsequent runs are much faster as they only fetch details for new or changed listings.
|
||||||
|
|
||||||
|
## Rate limiting
|
||||||
|
|
||||||
|
Each scraper includes polite delays between requests:
|
||||||
|
|
||||||
|
| Scraper | Delay between requests |
|
||||||
|
|---------|----------------------|
|
||||||
|
| Sreality | 0.3s (details), 0.5s (pages) |
|
||||||
|
| Realingo | 0.3s (details), 0.5s (pages) |
|
||||||
|
| Bezrealitky | 0.4s (details), 0.5s (pages) |
|
||||||
|
| iDNES | 0.4s (details), 1.0s (pages) + retry backoff (3/6/9/12s) |
|
||||||
|
| PSN | 0.5s (per project page) |
|
||||||
|
| CityHome | 0.5s (per project GPS fetch) |
|
||||||
5
build/.dockerignore
Normal file
5
build/.dockerignore
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
.git
|
||||||
|
mapa_bytu.html
|
||||||
|
byty_*.json
|
||||||
|
*.pyc
|
||||||
|
__pycache__
|
||||||
100
build/CONTAINER.md
Normal file
100
build/CONTAINER.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
# Container Setup
|
||||||
|
|
||||||
|
OCI container image for the apartment finder. Runs two processes:
|
||||||
|
|
||||||
|
1. **Web server** (`python3 -m http.server`) serving `mapa_bytu.html` on port 8080
|
||||||
|
2. **Cron job** running `run_all.sh` (all 6 scrapers + merge) every 12 hours
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ Container (python:3.13-alpine) │
|
||||||
|
│ │
|
||||||
|
│ PID 1: python3 -m http.server :8080 │
|
||||||
|
│ serves /app/data/ │
|
||||||
|
│ │
|
||||||
|
│ crond: runs run_all.sh at 06:00/18:00 │
|
||||||
|
│ Europe/Prague timezone │
|
||||||
|
│ │
|
||||||
|
│ /app/ ← scripts (.py, .sh) │
|
||||||
|
│ /app/data/ ← volume (JSON + HTML) │
|
||||||
|
│ ↑ symlinked from /app/byty_* │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
On startup, the web server starts immediately. The initial scrape runs in the background and populates data as it completes. Subsequent cron runs update the data in-place.
|
||||||
|
|
||||||
|
## Build and Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the image
|
||||||
|
docker build -t maru-hleda-byt .
|
||||||
|
|
||||||
|
# Run with persistent data volume
|
||||||
|
docker run -d --name maru-hleda-byt \
|
||||||
|
-p 8080:8080 \
|
||||||
|
-v maru-hleda-byt-data:/app/data \
|
||||||
|
--restart unless-stopped \
|
||||||
|
maru-hleda-byt
|
||||||
|
```
|
||||||
|
|
||||||
|
Access the map at **http://localhost:8080/mapa_bytu.html**
|
||||||
|
|
||||||
|
## Volume Persistence
|
||||||
|
|
||||||
|
A named volume `maru-hleda-byt-data` stores:
|
||||||
|
|
||||||
|
- `byty_*.json` — cached scraper data (6 source files + 1 merged)
|
||||||
|
- `mapa_bytu.html` — the generated interactive map
|
||||||
|
|
||||||
|
The JSON cache is important: each scraper skips re-fetching properties that haven't changed. Without the volume, every container restart triggers a full re-scrape of all 6 portals (several minutes with rate limiting).
|
||||||
|
|
||||||
|
## Cron Schedule
|
||||||
|
|
||||||
|
Scrapers run at **06:00** and **18:00 Europe/Prague time** (CET/CEST).
|
||||||
|
|
||||||
|
Cron output is forwarded to the container's stdout/stderr, visible via `docker logs`.
|
||||||
|
|
||||||
|
## Operations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View logs (including cron and scraper output)
|
||||||
|
docker logs -f maru-hleda-byt
|
||||||
|
|
||||||
|
# Check cron schedule
|
||||||
|
docker exec maru-hleda-byt crontab -l
|
||||||
|
|
||||||
|
# Trigger a manual scrape
|
||||||
|
docker exec maru-hleda-byt bash /app/run_all.sh
|
||||||
|
|
||||||
|
# Stop / start (data persists in volume)
|
||||||
|
docker stop maru-hleda-byt
|
||||||
|
docker start maru-hleda-byt
|
||||||
|
|
||||||
|
# Rebuild after code changes
|
||||||
|
docker stop maru-hleda-byt && docker rm maru-hleda-byt
|
||||||
|
docker build -t maru-hleda-byt .
|
||||||
|
docker run -d --name maru-hleda-byt \
|
||||||
|
-p 8080:8080 \
|
||||||
|
-v maru-hleda-byt-data:/app/data \
|
||||||
|
--restart unless-stopped \
|
||||||
|
maru-hleda-byt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**Map shows 404**: The initial background scrape hasn't finished yet. Check `docker logs` for progress. First run takes a few minutes due to rate-limited API calls.
|
||||||
|
|
||||||
|
**SSL errors from PSN scraper**: The `scrape_psn.py` uses `curl` (not Python urllib) specifically for Cloudflare SSL compatibility. Alpine's curl includes modern TLS via OpenSSL, so this should work. If not, check that `ca-certificates` is installed (`apk add ca-certificates`).
|
||||||
|
|
||||||
|
**Health check failing**: The health check has a 5-minute start period to allow the initial scrape to complete. If it still fails, verify the HTTP server is running: `docker exec maru-hleda-byt wget -q -O /dev/null http://localhost:8080/`.
|
||||||
|
|
||||||
|
**Timezone verification**: `docker exec maru-hleda-byt date` should show Czech time.
|
||||||
|
|
||||||
|
## Image Details
|
||||||
|
|
||||||
|
- **Base**: `python:3.13-alpine` (~55 MB)
|
||||||
|
- **Added packages**: `curl`, `bash`, `tzdata` (~10 MB)
|
||||||
|
- **No pip packages** — all scrapers use Python standard library only
|
||||||
|
- **Approximate image size**: ~70 MB
|
||||||
28
build/Dockerfile
Normal file
28
build/Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
FROM python:3.13-alpine
|
||||||
|
|
||||||
|
RUN apk add --no-cache curl bash tzdata \
|
||||||
|
&& cp /usr/share/zoneinfo/Europe/Prague /etc/localtime \
|
||||||
|
&& echo "Europe/Prague" > /etc/timezone
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir flask
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY scrape_and_map.py scrape_realingo.py scrape_bezrealitky.py \
|
||||||
|
scrape_idnes.py scrape_psn.py scrape_cityhome.py \
|
||||||
|
merge_and_map.py regen_map.py run_all.sh server.py ./
|
||||||
|
|
||||||
|
COPY build/crontab /etc/crontabs/root
|
||||||
|
COPY build/entrypoint.sh /entrypoint.sh
|
||||||
|
RUN chmod +x /entrypoint.sh run_all.sh
|
||||||
|
|
||||||
|
RUN mkdir -p /app/data
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=60s --timeout=5s --start-period=300s \
|
||||||
|
CMD wget -q -O /dev/null http://localhost:8080/ || exit 1
|
||||||
|
|
||||||
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
31
build/Makefile
Normal file
31
build/Makefile
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
IMAGE_NAME := maru-hleda-byt
|
||||||
|
CONTAINER_NAME := maru-hleda-byt
|
||||||
|
VOLUME_NAME := maru-hleda-byt-data
|
||||||
|
PORT := 8080
|
||||||
|
|
||||||
|
.PHONY: build run stop logs scrape restart clean
|
||||||
|
|
||||||
|
build:
|
||||||
|
docker build -f build/Dockerfile -t $(IMAGE_NAME) .
|
||||||
|
|
||||||
|
run: build
|
||||||
|
docker run -d --name $(CONTAINER_NAME) \
|
||||||
|
-p $(PORT):8080 \
|
||||||
|
-v $(VOLUME_NAME):/app/data \
|
||||||
|
--restart unless-stopped \
|
||||||
|
$(IMAGE_NAME)
|
||||||
|
@echo "Map will be at http://localhost:$(PORT)/mapa_bytu.html"
|
||||||
|
|
||||||
|
stop:
|
||||||
|
docker stop $(CONTAINER_NAME) && docker rm $(CONTAINER_NAME)
|
||||||
|
|
||||||
|
logs:
|
||||||
|
docker logs -f $(CONTAINER_NAME)
|
||||||
|
|
||||||
|
scrape:
|
||||||
|
docker exec $(CONTAINER_NAME) bash /app/run_all.sh
|
||||||
|
|
||||||
|
restart: stop run
|
||||||
|
|
||||||
|
clean: stop
|
||||||
|
docker rmi $(IMAGE_NAME)
|
||||||
1
build/crontab
Normal file
1
build/crontab
Normal file
@@ -0,0 +1 @@
|
|||||||
|
0 6,18 * * * cd /app && bash /app/run_all.sh >> /proc/1/fd/1 2>> /proc/1/fd/2
|
||||||
22
build/entrypoint.sh
Normal file
22
build/entrypoint.sh
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DATA_DIR="/app/data"
|
||||||
|
|
||||||
|
# Create symlinks so scripts (which write to /app/) persist data to the volume
|
||||||
|
for f in byty_sreality.json byty_realingo.json byty_bezrealitky.json \
|
||||||
|
byty_idnes.json byty_psn.json byty_cityhome.json byty_merged.json \
|
||||||
|
mapa_bytu.html ratings.json; do
|
||||||
|
# Remove real file if it exists (e.g. baked into image)
|
||||||
|
[ -f "/app/$f" ] && [ ! -L "/app/$f" ] && rm -f "/app/$f"
|
||||||
|
ln -sf "$DATA_DIR/$f" "/app/$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[entrypoint] Starting crond..."
|
||||||
|
crond -b -l 2
|
||||||
|
|
||||||
|
echo "[entrypoint] Starting initial scrape in background..."
|
||||||
|
bash /app/run_all.sh &
|
||||||
|
|
||||||
|
echo "[entrypoint] Starting combined server on port 8080..."
|
||||||
|
exec DATA_DIR="$DATA_DIR" python3 /app/server.py
|
||||||
40
byty_bezrealitky.json
Normal file
40
byty_bezrealitky.json
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": 981278,
|
||||||
|
"name": "Prodej bytu 3+kk 70 m²",
|
||||||
|
"price": 11890000,
|
||||||
|
"price_formatted": "11 890 000 Kč",
|
||||||
|
"locality": "Argentinská, Praha - Holešovice",
|
||||||
|
"lat": 50.1026043,
|
||||||
|
"lon": 14.4435365,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 70,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/981278-nabidka-prodej-bytu-argentinska-praha",
|
||||||
|
"source": "bezrealitky",
|
||||||
|
"image": "",
|
||||||
|
"first_seen": "2026-02-15",
|
||||||
|
"last_updated": "2026-02-15"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 991217,
|
||||||
|
"name": "Prodej bytu 3+kk 71 m²",
|
||||||
|
"price": 11490000,
|
||||||
|
"price_formatted": "11 490 000 Kč",
|
||||||
|
"locality": "Kolbenova, Praha - Vysočany",
|
||||||
|
"lat": 50.1113213,
|
||||||
|
"lon": 14.5106858,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 71,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.bezrealitky.cz/nemovitosti-byty-domy/991217-nabidka-prodej-bytu-kolbenova-praha",
|
||||||
|
"source": "bezrealitky",
|
||||||
|
"image": "",
|
||||||
|
"last_updated": "2026-02-15",
|
||||||
|
"first_seen": "2026-02-15"
|
||||||
|
}
|
||||||
|
]
|
||||||
38
byty_cityhome.json
Normal file
38
byty_cityhome.json
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": "cityhome_na-vaclavce-34_Byt A2.3",
|
||||||
|
"name": "Prodej bytu 3+1, 99 m² — Na Václavce 34",
|
||||||
|
"price": 13490000,
|
||||||
|
"price_formatted": "13 490 000 Kč",
|
||||||
|
"locality": "Na Václavce 34, Praha 5",
|
||||||
|
"lat": 50.0652858,
|
||||||
|
"lon": 14.3931318,
|
||||||
|
"disposition": "3+1",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 99.1,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.city-home.cz/projekty/na-vaclavce-34/nabidka-nemovitosti/byt-a23",
|
||||||
|
"source": "cityhome",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "cityhome_na-vaclavce-34_Byt A3.2",
|
||||||
|
"name": "Prodej bytu 3+1, 95 m² — Na Václavce 34",
|
||||||
|
"price": 13490000,
|
||||||
|
"price_formatted": "13 490 000 Kč",
|
||||||
|
"locality": "Na Václavce 34, Praha 5",
|
||||||
|
"lat": 50.0652858,
|
||||||
|
"lon": 14.3931318,
|
||||||
|
"disposition": "3+1",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 95.6,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.city-home.cz/projekty/na-vaclavce-34/nabidka-nemovitosti/byt-a32",
|
||||||
|
"source": "cityhome",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
}
|
||||||
|
]
|
||||||
290
byty_idnes.json
Normal file
290
byty_idnes.json
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": "699ed0af74468ff4c2079aa1",
|
||||||
|
"name": "Prodej bytu 3+1 86 m²",
|
||||||
|
"price": 4600000,
|
||||||
|
"price_formatted": "4 600 000 Kč",
|
||||||
|
"locality": "Hynka Puce, Praha 5 - Stodůlky",
|
||||||
|
"lat": 50.049168412058556,
|
||||||
|
"lon": 14.302095927878957,
|
||||||
|
"disposition": "3+1",
|
||||||
|
"floor": 8,
|
||||||
|
"area": 86,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "družstevní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-13-hynka-puce/699ed0af74468ff4c2079aa1/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "699ecf47513757ba150e0c74",
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 11390000,
|
||||||
|
"price_formatted": "11 390 000 Kč",
|
||||||
|
"locality": "Kytlická, Praha 9 - Prosek",
|
||||||
|
"lat": 50.1251431182,
|
||||||
|
"lon": 14.5077027612,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 8,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "2011",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-9-kytlicka/699ecf47513757ba150e0c74/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "699c09d25d571b3c7b026d3e",
|
||||||
|
"name": "Prodej bytu 3+kk 93 m²",
|
||||||
|
"price": 11890000,
|
||||||
|
"price_formatted": "11 890 000 Kč",
|
||||||
|
"locality": "Kříženeckého náměstí, Praha 5 - Hlubočepy",
|
||||||
|
"lat": 50.03137852,
|
||||||
|
"lon": 14.39175816,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 93,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-5-krizeneckeho-namesti/699c09d25d571b3c7b026d3e/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "693690e98418631b48025208",
|
||||||
|
"name": "Prodej bytu 3+1 87 m²",
|
||||||
|
"price": 11323000,
|
||||||
|
"price_formatted": "11 323 000 Kč",
|
||||||
|
"locality": "Libušská, Praha 4 - Libuš",
|
||||||
|
"lat": 50.009743674736,
|
||||||
|
"lon": 14.460835345662,
|
||||||
|
"disposition": "3+1",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 87,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "družstevní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-12-libusska/693690e98418631b48025208/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "699487a84abe8029bd065570",
|
||||||
|
"name": "Prodej bytu 3+1 96 m²",
|
||||||
|
"price": 13490000,
|
||||||
|
"price_formatted": "13 490 000 Kč",
|
||||||
|
"locality": "Na Václavce, Praha 5 - Smíchov",
|
||||||
|
"lat": 50.0652882346,
|
||||||
|
"lon": 14.3931192571,
|
||||||
|
"disposition": "3+1",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 96,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-5-na-vaclavce/699487a84abe8029bd065570/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "697c7e54d08e16f19902d777",
|
||||||
|
"name": "Prodej bytu 3+kk 76 m²",
|
||||||
|
"price": 11590040,
|
||||||
|
"price_formatted": "11 590 040 Kč",
|
||||||
|
"locality": "Žilinská, Praha 4 - Záběhlice",
|
||||||
|
"lat": 50.04710645755815,
|
||||||
|
"lon": 14.473057214055794,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 76,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-4-zilinska/697c7e54d08e16f19902d777/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "68f5f1e873fec2e50c0cc20e",
|
||||||
|
"name": "Prodej bytu 3+kk 85 m²",
|
||||||
|
"price": 13499900,
|
||||||
|
"price_formatted": "13 499 900 Kč",
|
||||||
|
"locality": "Hořejší nábřeží, Praha 5 - Smíchov",
|
||||||
|
"lat": 50.0724036111,
|
||||||
|
"lon": 14.4103030556,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 85,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-5-horejsi-nabrezi/68f5f1e873fec2e50c0cc20e/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "6941cf632ff10124be08ce19",
|
||||||
|
"name": "Prodej bytu 4+kk 94 m²",
|
||||||
|
"price": 13249900,
|
||||||
|
"price_formatted": "13 249 900 Kč",
|
||||||
|
"locality": "V dolině, Praha 10 - Michle",
|
||||||
|
"lat": 50.0579944444,
|
||||||
|
"lon": 14.4682905556,
|
||||||
|
"disposition": "4+kk",
|
||||||
|
"floor": 14,
|
||||||
|
"area": 94,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-10-v-doline/6941cf632ff10124be08ce19/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "690c2cba1c264f9f43027912",
|
||||||
|
"name": "Prodej bytu 3+kk 74 m²",
|
||||||
|
"price": 10631123,
|
||||||
|
"price_formatted": "10 631 123 Kč",
|
||||||
|
"locality": "Voskovcova, Praha 5 - Hlubočepy",
|
||||||
|
"lat": 50.0290438889,
|
||||||
|
"lon": 14.3641566667,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 6,
|
||||||
|
"area": 74,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-5-voskovcova/690c2cba1c264f9f43027912/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "68404b3d8178bbed020f1742",
|
||||||
|
"name": "Prodej bytu 3+kk 71 m²",
|
||||||
|
"price": 10990000,
|
||||||
|
"price_formatted": "10 990 000 Kč",
|
||||||
|
"locality": "Praha 10 - Uhříněves",
|
||||||
|
"lat": 50.026899,
|
||||||
|
"lon": 14.613713,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 71,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-22/68404b3d8178bbed020f1742/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "6932bf9dc9442dc194054416",
|
||||||
|
"name": "Prodej bytu 3+kk 71 m²",
|
||||||
|
"price": 8100000,
|
||||||
|
"price_formatted": "8 100 000 Kč",
|
||||||
|
"locality": "Štětínská, Praha 8 - Bohnice, okres Praha",
|
||||||
|
"lat": 50.1297302,
|
||||||
|
"lon": 14.4286652,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 71,
|
||||||
|
"building_type": "1974",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-8-stetinska/6932bf9dc9442dc194054416/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "699eacc0a56ec9b4a80069b6",
|
||||||
|
"name": "Prodej bytu 3+kk 81 m²",
|
||||||
|
"price": 13000000,
|
||||||
|
"price_formatted": "13 000 000 Kč",
|
||||||
|
"locality": "Hlučkova, Praha 9 - Letňany",
|
||||||
|
"lat": 50.141739,
|
||||||
|
"lon": 14.522086,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 17,
|
||||||
|
"area": 81,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-18-hluckova/699eacc0a56ec9b4a80069b6/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "682b20ae5fcffc3dc8072856",
|
||||||
|
"name": "Prodej bytu 3+kk 78 m²",
|
||||||
|
"price": 12463000,
|
||||||
|
"price_formatted": "12 463 000 Kč",
|
||||||
|
"locality": "Kubelíkova, Praha 3 - Žižkov",
|
||||||
|
"lat": 50.0823325029164,
|
||||||
|
"lon": 14.451052236466976,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 78,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-3-kubelikova/682b20ae5fcffc3dc8072856/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "68f0b7b4263df471cb050df9",
|
||||||
|
"name": "Prodej bytu 4+kk 75 m²",
|
||||||
|
"price": 10363000,
|
||||||
|
"price_formatted": "10 363 000 Kč",
|
||||||
|
"locality": "Karla Guta, Praha 10 - Uhříněves",
|
||||||
|
"lat": 50.030382258,
|
||||||
|
"lon": 14.5931238354,
|
||||||
|
"disposition": "4+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 75,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-22-karla-guta/68f0b7b4263df471cb050df9/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "695cdf5113e97880200d9e62",
|
||||||
|
"name": "Prodej bytu 3+kk 82 m²",
|
||||||
|
"price": 11133000,
|
||||||
|
"price_formatted": "11 133 000 Kč",
|
||||||
|
"locality": "K Vinoři, Praha 9 - Kbely",
|
||||||
|
"lat": 50.132835725,
|
||||||
|
"lon": 14.5613326001,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 82,
|
||||||
|
"building_type": "2026",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-19-k-vinori/695cdf5113e97880200d9e62/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": "69930de7098209b20e066a6c",
|
||||||
|
"name": "Prodej bytu 3+kk 91 m²",
|
||||||
|
"price": 11000000,
|
||||||
|
"price_formatted": "11 000 000 Kč",
|
||||||
|
"locality": "Formanská, Praha 4 - Újezd u Průhonic, okres Praha",
|
||||||
|
"lat": 50.0114383,
|
||||||
|
"lon": 14.5469,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 91,
|
||||||
|
"building_type": "2017",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://reality.idnes.cz/detail/prodej/byt/praha-11-formanska/69930de7098209b20e066a6c/",
|
||||||
|
"source": "idnes",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
}
|
||||||
|
]
|
||||||
1014
byty_merged.json
Normal file
1014
byty_merged.json
Normal file
File diff suppressed because it is too large
Load Diff
20
byty_psn.json
Normal file
20
byty_psn.json
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": "8941",
|
||||||
|
"name": "Prodej bytu 3+kk, 102 m² — JITRO",
|
||||||
|
"price": 13994000,
|
||||||
|
"price_formatted": "13 994 000 Kč",
|
||||||
|
"locality": "Litevská 1174/8, Praha 10",
|
||||||
|
"lat": 50.0729,
|
||||||
|
"lon": 14.4767,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 102.7,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "osobní",
|
||||||
|
"url": "https://psn.cz/prodej/ubytovaci-jednotka-3-kk-litevska-praha-10-vrsovice-lit4219",
|
||||||
|
"source": "psn",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
}
|
||||||
|
]
|
||||||
164
byty_realingo.json
Normal file
164
byty_realingo.json
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": 24515963,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 11390000,
|
||||||
|
"price_formatted": "11 390 000 Kč",
|
||||||
|
"locality": "Kytlická, Praha",
|
||||||
|
"lat": 50.1251431182,
|
||||||
|
"lon": 14.5077027612,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "WIREFRAME",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-3+kk-kytlicka-praha/24515963",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24515884,
|
||||||
|
"name": "Prodej bytu 3+kk 81 m²",
|
||||||
|
"price": 13000000,
|
||||||
|
"price_formatted": "13 000 000 Kč",
|
||||||
|
"locality": "Hlučkova 869, Praha",
|
||||||
|
"lat": 50.142303781599,
|
||||||
|
"lon": 14.522362316941,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 81,
|
||||||
|
"building_type": "OTHER",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-3+kk-hluckova-869-praha/24515884",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24515669,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 8487297,
|
||||||
|
"price_formatted": "8 487 297 Kč",
|
||||||
|
"locality": "Praha, 190 00",
|
||||||
|
"lat": 50.106598,
|
||||||
|
"lon": 14.506245,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-190-00/24515669",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24515653,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 8890000,
|
||||||
|
"price_formatted": "8 890 000 Kč",
|
||||||
|
"locality": "Praha, 130 00",
|
||||||
|
"lat": 50.087602,
|
||||||
|
"lon": 14.470882,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-130-00/24515653",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24515514,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 7490000,
|
||||||
|
"price_formatted": "7 490 000 Kč",
|
||||||
|
"locality": "Praha, 141 00",
|
||||||
|
"lat": 50.045786,
|
||||||
|
"lon": 14.470711,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-141-00/24515514",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24514922,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 12132000,
|
||||||
|
"price_formatted": "12 132 000 Kč",
|
||||||
|
"locality": "Praha, 120 00",
|
||||||
|
"lat": 50.076449,
|
||||||
|
"lon": 14.435263,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-2+kk-slezska-praha/24514922",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24514813,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 8490000,
|
||||||
|
"price_formatted": "8 490 000 Kč",
|
||||||
|
"locality": "Praha, 100 00",
|
||||||
|
"lat": 50.074273,
|
||||||
|
"lon": 14.493284,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-100-00/24514813",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24514769,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 6980000,
|
||||||
|
"price_formatted": "6 980 000 Kč",
|
||||||
|
"locality": "Praha, 154 00",
|
||||||
|
"lat": 50.010056,
|
||||||
|
"lon": 14.353809,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-154-00/24514769",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 24514708,
|
||||||
|
"name": "Prodej bytu Atypický None m²",
|
||||||
|
"price": 5362000,
|
||||||
|
"price_formatted": "5 362 000 Kč",
|
||||||
|
"locality": "Praha, 155 00",
|
||||||
|
"lat": 50.030571,
|
||||||
|
"lon": 14.308491,
|
||||||
|
"disposition": "Atypický",
|
||||||
|
"floor": null,
|
||||||
|
"area": null,
|
||||||
|
"building_type": "neuvedeno",
|
||||||
|
"ownership": "neuvedeno",
|
||||||
|
"url": "https://www.realingo.cz/prodej/byt-ostatni-byty-praha-155-00/24514708",
|
||||||
|
"source": "realingo",
|
||||||
|
"image": "",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
}
|
||||||
|
]
|
||||||
548
byty_sreality.json
Normal file
548
byty_sreality.json
Normal file
@@ -0,0 +1,548 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hash_id": 938877772,
|
||||||
|
"name": "Prodej bytu 3+kk 99 m²",
|
||||||
|
"price": 12990000,
|
||||||
|
"price_formatted": "12 990 000 Kč",
|
||||||
|
"locality": "Čeljabinská, Praha 10 - Vršovice",
|
||||||
|
"lat": 50.069641,
|
||||||
|
"lon": 14.470198,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 99,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-vrsovice-celjabinska/938877772",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_D/kBfrbpoeNBLdvLCneFodIxL/21cc.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2036855628,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 10490000,
|
||||||
|
"price_formatted": "10 490 000 Kč",
|
||||||
|
"locality": "Na Výrovně, Praha 5 - Stodůlky",
|
||||||
|
"lat": 50.039608,
|
||||||
|
"lon": 14.316702,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-stodulky-na-vyrovne/2036855628",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_C/nPXMbbUsvqW7e6cQFkEl5P/7399.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2148991820,
|
||||||
|
"name": "Prodej bytu 3+kk 72 m²",
|
||||||
|
"price": 10990000,
|
||||||
|
"price_formatted": "10 990 000 Kč",
|
||||||
|
"locality": "Pod Marjánkou, Praha 6 - Břevnov",
|
||||||
|
"lat": 50.084381,
|
||||||
|
"lon": 14.372257,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 72,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-brevnov-pod-marjankou/2148991820",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_C/kOzkBkwYBTCNNSBPI1FiGB0F/c3a0.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3226313292,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 13500000,
|
||||||
|
"price_formatted": "13 500 000 Kč",
|
||||||
|
"locality": "Na Neklance, Praha 5 - Smíchov",
|
||||||
|
"lat": 50.060715,
|
||||||
|
"lon": 14.401836,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-smichov-na-neklance/3226313292",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/kBfrbpoeND2I1YDy2Fq7ErU/6389.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"last_updated": "2026-02-15",
|
||||||
|
"first_seen": "2026-02-15"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 475530060,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 12250000,
|
||||||
|
"price_formatted": "12 250 000 Kč",
|
||||||
|
"locality": "Radouňova, Praha 5 - Stodůlky",
|
||||||
|
"lat": 50.039043,
|
||||||
|
"lon": 14.314881,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-stodulky-radounova/475530060",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_A/nDJ4VEZEqxQDMR0LFbAhGV/d4cf.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2303799884,
|
||||||
|
"name": "Prodej bytu 3+kk 88 m² (Jednopodlažní)",
|
||||||
|
"price": 12860000,
|
||||||
|
"price_formatted": "12 860 000 Kč",
|
||||||
|
"locality": "Spojovací, Praha 9 - Vysočany",
|
||||||
|
"lat": 50.100174,
|
||||||
|
"lon": 14.492079,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 88,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-vysocany-spojovaci/2303799884",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_oV_A/kQOIvbF2D1DN63hulCAKv40/3667.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3493290828,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m²",
|
||||||
|
"price": 11390000,
|
||||||
|
"price_formatted": "11 390 000 Kč",
|
||||||
|
"locality": "Kytlická, Praha 9 - Prosek",
|
||||||
|
"lat": 50.125145,
|
||||||
|
"lon": 14.507703,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-prosek-kytlicka/3493290828",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/nPVpfd5QLLDqk1BGdrF3rQMW/0fe5.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 151528268,
|
||||||
|
"name": "Prodej bytu 3+kk 86 m²",
|
||||||
|
"price": 11390000,
|
||||||
|
"price_formatted": "11 390 000 Kč",
|
||||||
|
"locality": "Spojovací, Praha",
|
||||||
|
"lat": 50.101852,
|
||||||
|
"lon": 14.486118,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 86,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha--spojovaci/151528268",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_of_C/kPxr1WDRoIBXSQV6LE550j7/1607.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 1837527884,
|
||||||
|
"name": "Prodej bytu 3+kk 73 m² (Jednopodlažní)",
|
||||||
|
"price": 12790000,
|
||||||
|
"price_formatted": "12 790 000 Kč",
|
||||||
|
"locality": "Vrázova, Praha - Smíchov",
|
||||||
|
"lat": 50.071224,
|
||||||
|
"lon": 14.407872,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 73,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-smichov-vrazova/1837527884",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/kY1K2LlXQDnuVLD65F2mjiY/96d4.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3330433868,
|
||||||
|
"name": "Prodej bytu 3+kk 93 m²",
|
||||||
|
"price": 11890000,
|
||||||
|
"price_formatted": "11 890 000 Kč",
|
||||||
|
"locality": "Kříženeckého náměstí, Praha 5 - Hlubočepy",
|
||||||
|
"lat": 50.03138,
|
||||||
|
"lon": 14.391757,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 93,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-hlubocepy-krizeneckeho-namesti/3330433868",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/nPVpfd5QLLChvUCFgIF2b8p9/bffb.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2053579340,
|
||||||
|
"name": "Prodej bytu 3+kk 76 m²",
|
||||||
|
"price": 11858981,
|
||||||
|
"price_formatted": "11 858 981 Kč",
|
||||||
|
"locality": "Za Novákovou zahradou, Praha - Satalice",
|
||||||
|
"lat": 50.122192,
|
||||||
|
"lon": 14.57646,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 76,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-satalice-za-novakovou-zahradou/2053579340",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_oe_B/nO1Ur3YPjB17k9qAElHGe3/e889.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"last_updated": "2026-02-15",
|
||||||
|
"first_seen": "2026-02-15"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3651539788,
|
||||||
|
"name": "Prodej bytu 3+kk 69 m² (Jednopodlažní)",
|
||||||
|
"price": 13500000,
|
||||||
|
"price_formatted": "13 500 000 Kč",
|
||||||
|
"locality": "Zvěřinova, Praha 3 - Strašnice",
|
||||||
|
"lat": 50.084606,
|
||||||
|
"lon": 14.482681,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 12,
|
||||||
|
"area": 69,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-strasnice-zverinova/3651539788",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_og_D/nDJ4VEZEqxmIDK5SFVQ67V/47d3.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 4005061452,
|
||||||
|
"name": "Prodej bytu 3+kk 101 m² (Jednopodlažní)",
|
||||||
|
"price": 12875000,
|
||||||
|
"price_formatted": "12 875 000 Kč",
|
||||||
|
"locality": "U Hostavického potoka, Praha 9 - Hostavice",
|
||||||
|
"lat": 50.086601,
|
||||||
|
"lon": 14.5636,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 101,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-hostavice-u-hostavickeho-potoka/4005061452",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_B/kY1K2LlXQBdLY8B5hFyVzb6/c266.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 589460300,
|
||||||
|
"name": "Prodej bytu 3+kk 75 m²",
|
||||||
|
"price": 13126000,
|
||||||
|
"price_formatted": "13 126 000 Kč",
|
||||||
|
"locality": "Ke Slivenci, Praha - Lochkov",
|
||||||
|
"lat": 50.004192,
|
||||||
|
"lon": 14.355805,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 75,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-lochkov-ke-slivenci/589460300",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/kBfrbpoeNCgOJyFn9Fq0T4y/bed6.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2926625612,
|
||||||
|
"name": "Prodej bytu 3+kk 85 m²",
|
||||||
|
"price": 13499900,
|
||||||
|
"price_formatted": "13 499 900 Kč",
|
||||||
|
"locality": "Hořejší nábřeží, Praha 5 - Smíchov",
|
||||||
|
"lat": 50.072403,
|
||||||
|
"lon": 14.410302,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 85,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-smichov-horejsi-nabrezi/2926625612",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/nPVpfd5QLLBPLz5GC0F3Hshx/c1f4.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3672994636,
|
||||||
|
"name": "Prodej bytu 3+kk 81 m²",
|
||||||
|
"price": 12390000,
|
||||||
|
"price_formatted": "12 390 000 Kč",
|
||||||
|
"locality": "Stochovská, Praha 6 - Ruzyně",
|
||||||
|
"lat": 50.082985,
|
||||||
|
"lon": 14.311815,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 81,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-ruzyne-stochovska/3672994636",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/nPVpfd5QLLD53UiFNrF2fBro/c20c.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 4070581580,
|
||||||
|
"name": "Prodej bytu 3+kk 77 m²",
|
||||||
|
"price": 12207113,
|
||||||
|
"price_formatted": "12 207 113 Kč",
|
||||||
|
"locality": "Marie Podvalové, Praha - Čakovice",
|
||||||
|
"lat": 50.157696,
|
||||||
|
"lon": 14.519159,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 77,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-cakovice-marie-podvalove/4070581580",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/kBfrbpoeNBFLmuFzPFuWw0w/0867.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2772919116,
|
||||||
|
"name": "Prodej bytu 3+kk 81 m²",
|
||||||
|
"price": 13000000,
|
||||||
|
"price_formatted": "13 000 000 Kč",
|
||||||
|
"locality": "Hlučkova, Praha 9 - Letňany",
|
||||||
|
"lat": 50.141739,
|
||||||
|
"lon": 14.522086,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 81,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-letnany-hluckova/2772919116",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/nPVpfd5QLLDqzReGOMF3mju3/0593.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2242032460,
|
||||||
|
"name": "Prodej bytu 3+kk 98 m²",
|
||||||
|
"price": 12762764,
|
||||||
|
"price_formatted": "12 762 764 Kč",
|
||||||
|
"locality": "Lodžská, Praha 8 - Bohnice",
|
||||||
|
"lat": 50.13076,
|
||||||
|
"lon": 14.423249,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 98,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-bohnice-lodzska/2242032460",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_B/kY1K2LlXQBsPCGB2DFyc98H/0138.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3617202764,
|
||||||
|
"name": "Prodej bytu 3+kk 79 m²",
|
||||||
|
"price": 12959520,
|
||||||
|
"price_formatted": "12 959 520 Kč",
|
||||||
|
"locality": "Komárkova, Praha 4 - Chodov",
|
||||||
|
"lat": 50.036095,
|
||||||
|
"lon": 14.48035,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 79,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-chodov-komarkova/3617202764",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_B/kY1K2LlXQCEs3VCXcFzFm2Z/52f3.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2860663372,
|
||||||
|
"name": "Prodej bytu 3+kk 78 m² (Jednopodlažní)",
|
||||||
|
"price": 12463000,
|
||||||
|
"price_formatted": "12 463 000 Kč",
|
||||||
|
"locality": "Kubelíkova, Praha",
|
||||||
|
"lat": 50.082317,
|
||||||
|
"lon": 14.450463,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 78,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha--kubelikova/2860663372",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_oZ_C/nsLxLojIBfZuBIjJDOugLv/2953.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 158065228,
|
||||||
|
"name": "Prodej bytu 3+kk 73 m²",
|
||||||
|
"price": 10947779,
|
||||||
|
"price_formatted": "10 947 779 Kč",
|
||||||
|
"locality": "Marie Podvalové, Praha - Čakovice",
|
||||||
|
"lat": 50.157696,
|
||||||
|
"lon": 14.519159,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 73,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-cakovice-marie-podvalove/158065228",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/nPXMbbUsvqawIkHTbFrh4zH/4f1d.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 4227625804,
|
||||||
|
"name": "Prodej bytu 3+kk 81 m²",
|
||||||
|
"price": 10990000,
|
||||||
|
"price_formatted": "10 990 000 Kč",
|
||||||
|
"locality": "Ukrajinská, Praha 10 - Vršovice",
|
||||||
|
"lat": 50.065208,
|
||||||
|
"lon": 14.450711,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 81,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-vrsovice-ukrajinska/4227625804",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/nPXMbbUsvqDzGaHEfWFp5WXH/fd47.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 1313456972,
|
||||||
|
"name": "Prodej bytu 3+kk 87 m²",
|
||||||
|
"price": 3641309,
|
||||||
|
"price_formatted": "3 641 309 Kč",
|
||||||
|
"locality": "Praha 9",
|
||||||
|
"lat": 50.106956,
|
||||||
|
"lon": 14.510207,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 87,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Družstevní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-praha-9-/1313456972",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_B/kY1K2LlXQBsPCGXMFvI8II/9010.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 1671439692,
|
||||||
|
"name": "Prodej bytu 3+kk 77 m²",
|
||||||
|
"price": 12556524,
|
||||||
|
"price_formatted": "12 556 524 Kč",
|
||||||
|
"locality": "Marie Podvalové, Praha - Čakovice",
|
||||||
|
"lat": 50.157696,
|
||||||
|
"lon": 14.519159,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 77,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-cakovice-marie-podvalove/1671439692",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_A/nPXMbbUsvqW7e6HmhFuWvTy/67cb.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 576226124,
|
||||||
|
"name": "Prodej bytu 3+kk 71 m²",
|
||||||
|
"price": 12026912,
|
||||||
|
"price_formatted": "12 026 912 Kč",
|
||||||
|
"locality": "Hábova, Praha 5 - Stodůlky",
|
||||||
|
"lat": 50.04636,
|
||||||
|
"lon": 14.310556,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 71,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-stodulky-habova/576226124",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_of_A/nO5OZtPbfGXuqXa5EzIqYl/4c78.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 473465676,
|
||||||
|
"name": "Prodej bytu 3+kk 73 m²",
|
||||||
|
"price": 12349349,
|
||||||
|
"price_formatted": "12 349 349 Kč",
|
||||||
|
"locality": "Hábova, Praha 5 - Stodůlky",
|
||||||
|
"lat": 50.04636,
|
||||||
|
"lon": 14.310556,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 73,
|
||||||
|
"building_type": "Smíšená",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-stodulky-habova/473465676",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_of_A/nO5OZtPbfGCc8bBbKEzIlyN/5708.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 2185458508,
|
||||||
|
"name": "Prodej bytu 3+kk 76 m²",
|
||||||
|
"price": 11978000,
|
||||||
|
"price_formatted": "11 978 000 Kč",
|
||||||
|
"locality": "Matoušova, Praha - Smíchov",
|
||||||
|
"lat": 50.074284,
|
||||||
|
"lon": 14.405826,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 76,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-smichov-matousova/2185458508",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_C/kY1K2LlXQJGqrEMbF29iKr/c977.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3988325196,
|
||||||
|
"name": "Prodej bytu 3+kk 83 m² (Jednopodlažní)",
|
||||||
|
"price": 13190000,
|
||||||
|
"price_formatted": "13 190 000 Kč",
|
||||||
|
"locality": "Práčská, Praha",
|
||||||
|
"lat": 50.053101,
|
||||||
|
"lon": 14.507191,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 2,
|
||||||
|
"area": 83,
|
||||||
|
"building_type": "Kamenná",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha--pracska/3988325196",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_A/kOzkBkwYBTDPt7SL0bFanjav/5b75.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3019572044,
|
||||||
|
"name": "Prodej bytu 3+kk 76 m²",
|
||||||
|
"price": 10790000,
|
||||||
|
"price_formatted": "10 790 000 Kč",
|
||||||
|
"locality": "Plzákova, Praha - Kbely",
|
||||||
|
"lat": 50.13237,
|
||||||
|
"lon": 14.53639,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 3,
|
||||||
|
"area": 76,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-kbely-plzakova/3019572044",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p7_A/nDJ4VEZEqCaOxZQ8PFZch2R/b90f.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 3704697676,
|
||||||
|
"name": "Prodej bytu 3+kk 72 m²",
|
||||||
|
"price": 8000000,
|
||||||
|
"price_formatted": "8 000 000 Kč",
|
||||||
|
"locality": "Litevská, Praha 10 - Vršovice",
|
||||||
|
"lat": 50.072536,
|
||||||
|
"lon": 14.476557,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 5,
|
||||||
|
"area": 72,
|
||||||
|
"building_type": "Cihlová",
|
||||||
|
"ownership": "Družstevní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-vrsovice-litevska/3704697676",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_of_A/nOztZkD4ZlC2Y2EU6E0MiZv/15fc.png?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hash_id": 1137623884,
|
||||||
|
"name": "Prodej bytu 3+kk 71 m²",
|
||||||
|
"price": 12318349,
|
||||||
|
"price_formatted": "12 318 349 Kč",
|
||||||
|
"locality": "Praha 9",
|
||||||
|
"lat": 50.106956,
|
||||||
|
"lon": 14.510207,
|
||||||
|
"disposition": "3+kk",
|
||||||
|
"floor": 4,
|
||||||
|
"area": 71,
|
||||||
|
"building_type": "Skeletová",
|
||||||
|
"ownership": "Osobní",
|
||||||
|
"url": "https://www.sreality.cz/detail/prodej/byt/3+kk/praha-praha-9-/1137623884",
|
||||||
|
"image": "https://d18-a.sdn.cz/d_18/c_img_p8_B/nPVpfd5QLLksKHbwFvjCd6/56ac.jpeg?fl=res,400,300,3|shr,,20|jpg,90",
|
||||||
|
"scraped_at": "2026-02-25"
|
||||||
|
}
|
||||||
|
]
|
||||||
202
generate_status.py
Normal file
202
generate_status.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Generate status.json from scraper JSON outputs and run log."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
HERE = Path(__file__).parent
|
||||||
|
|
||||||
|
SOURCE_FILES = {
|
||||||
|
"Sreality": "byty_sreality.json",
|
||||||
|
"Realingo": "byty_realingo.json",
|
||||||
|
"Bezrealitky": "byty_bezrealitky.json",
|
||||||
|
"iDNES": "byty_idnes.json",
|
||||||
|
"PSN": "byty_psn.json",
|
||||||
|
"CityHome": "byty_cityhome.json",
|
||||||
|
}
|
||||||
|
|
||||||
|
MERGED_FILE = "byty_merged.json"
|
||||||
|
|
||||||
|
|
||||||
|
def count_source(path: Path) -> dict:
|
||||||
|
"""Read a scraper JSON and return accepted count + file mtime."""
|
||||||
|
if not path.exists():
|
||||||
|
return {"accepted": 0, "error": "soubor nenalezen"}
|
||||||
|
try:
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
mtime = datetime.fromtimestamp(path.stat().st_mtime).isoformat(timespec="seconds")
|
||||||
|
return {"accepted": len(data), "updated_at": mtime}
|
||||||
|
except Exception as e:
|
||||||
|
return {"accepted": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_log(log_path: str) -> dict[str, dict]:
|
||||||
|
"""Parse scraper run log and extract per-source statistics.
|
||||||
|
|
||||||
|
Scrapers log summary lines like:
|
||||||
|
✓ Vyhovující byty: 12
|
||||||
|
Vyloučeno (prodáno): 5
|
||||||
|
Staženo stránek: 3
|
||||||
|
Staženo inzerátů: 48
|
||||||
|
Celkem bytů v cache: 120
|
||||||
|
and section headers like:
|
||||||
|
[2/6] Realingo
|
||||||
|
"""
|
||||||
|
if not log_path or not os.path.exists(log_path):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
with open(log_path, encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Split into per-source sections by the [N/6] Step header
|
||||||
|
# Each section header looks like "[2/6] Realingo\n----..."
|
||||||
|
section_pattern = re.compile(r'\[(\d+)/\d+\]\s+(.+)\n-+', re.MULTILINE)
|
||||||
|
sections_found = list(section_pattern.finditer(content))
|
||||||
|
|
||||||
|
if not sections_found:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
stats = {}
|
||||||
|
for i, match in enumerate(sections_found):
|
||||||
|
step_name = match.group(2).strip()
|
||||||
|
start = match.end()
|
||||||
|
end = sections_found[i + 1].start() if i + 1 < len(sections_found) else len(content)
|
||||||
|
section_text = content[start:end]
|
||||||
|
|
||||||
|
# Identify which sources this section covers
|
||||||
|
# "PSN + CityHome" covers both
|
||||||
|
source_names = []
|
||||||
|
for name in SOURCE_FILES:
|
||||||
|
if name.lower() in step_name.lower():
|
||||||
|
source_names.append(name)
|
||||||
|
if not source_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse numeric summary lines
|
||||||
|
def extract(pattern: str) -> Optional[int]:
|
||||||
|
m = re.search(pattern, section_text)
|
||||||
|
return int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
# Lines present in all/most scrapers
|
||||||
|
accepted = extract(r'Vyhovující byty[:\s]+(\d+)')
|
||||||
|
fetched = extract(r'Staženo inzerátů[:\s]+(\d+)')
|
||||||
|
pages = extract(r'Staženo stránek[:\s]+(\d+)')
|
||||||
|
cached = extract(r'Celkem bytů v cache[:\s]+(\d+)')
|
||||||
|
cache_hits = extract(r'Cache hit[:\s]+(\d+)')
|
||||||
|
|
||||||
|
# Rejection reasons — collect all into a dict
|
||||||
|
excluded = {}
|
||||||
|
for m in re.finditer(r'Vyloučeno\s+\(([^)]+)\)[:\s]+(\d+)', section_text):
|
||||||
|
excluded[m.group(1)] = int(m.group(2))
|
||||||
|
# Also PSN-style "Vyloučeno (prodáno): N"
|
||||||
|
total_excluded = sum(excluded.values()) if excluded else extract(r'Vyloučen\w*[:\s]+(\d+)')
|
||||||
|
|
||||||
|
entry = {}
|
||||||
|
if accepted is not None:
|
||||||
|
entry["accepted"] = accepted
|
||||||
|
if fetched is not None:
|
||||||
|
entry["fetched"] = fetched
|
||||||
|
if pages is not None:
|
||||||
|
entry["pages"] = pages
|
||||||
|
if cached is not None:
|
||||||
|
entry["cached"] = cached
|
||||||
|
if cache_hits is not None:
|
||||||
|
entry["cache_hits"] = cache_hits
|
||||||
|
if excluded:
|
||||||
|
entry["excluded"] = excluded
|
||||||
|
elif total_excluded is not None:
|
||||||
|
entry["excluded_total"] = total_excluded
|
||||||
|
|
||||||
|
for name in source_names:
|
||||||
|
stats[name] = entry
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
start_time = None
|
||||||
|
duration_sec = None
|
||||||
|
|
||||||
|
if len(sys.argv) >= 3:
|
||||||
|
start_time = sys.argv[1]
|
||||||
|
try:
|
||||||
|
duration_sec = int(sys.argv[2])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not start_time:
|
||||||
|
start_time = datetime.now().isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
log_path = sys.argv[3] if len(sys.argv) >= 4 else None
|
||||||
|
log_stats = parse_log(log_path)
|
||||||
|
|
||||||
|
sources = []
|
||||||
|
for name, filename in SOURCE_FILES.items():
|
||||||
|
path = HERE / filename
|
||||||
|
info = count_source(path)
|
||||||
|
info["name"] = name
|
||||||
|
|
||||||
|
# Merge log stats
|
||||||
|
ls = log_stats.get(name, {})
|
||||||
|
for k in ("fetched", "pages", "cached", "cache_hits", "excluded", "excluded_total"):
|
||||||
|
if k in ls:
|
||||||
|
info[k] = ls[k]
|
||||||
|
# Override accepted from log if available (log is authoritative for latest run)
|
||||||
|
if "accepted" in ls:
|
||||||
|
info["accepted"] = ls["accepted"]
|
||||||
|
|
||||||
|
sources.append(info)
|
||||||
|
|
||||||
|
# Total accepted before dedup
|
||||||
|
total_accepted = sum(s.get("accepted", 0) for s in sources)
|
||||||
|
|
||||||
|
# Merged / deduplicated count
|
||||||
|
merged_path = HERE / MERGED_FILE
|
||||||
|
deduplicated = 0
|
||||||
|
if merged_path.exists():
|
||||||
|
try:
|
||||||
|
merged = json.loads(merged_path.read_text(encoding="utf-8"))
|
||||||
|
deduplicated = len(merged)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
duplicates_removed = total_accepted - deduplicated if deduplicated else 0
|
||||||
|
|
||||||
|
status = {
|
||||||
|
"status": "done",
|
||||||
|
"timestamp": start_time,
|
||||||
|
"duration_sec": duration_sec,
|
||||||
|
"total_accepted": total_accepted,
|
||||||
|
"deduplicated": deduplicated,
|
||||||
|
"duplicates_removed": duplicates_removed,
|
||||||
|
"sources": sources,
|
||||||
|
}
|
||||||
|
|
||||||
|
out = HERE / "status.json"
|
||||||
|
out.write_text(json.dumps(status, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
print(f"Status uložen: {out}")
|
||||||
|
print(f" Celkem bytů (před dedup): {total_accepted}")
|
||||||
|
print(f" Po deduplikaci: {deduplicated}")
|
||||||
|
if duplicates_removed:
|
||||||
|
print(f" Odstraněno duplikátů: {duplicates_removed}")
|
||||||
|
for s in sources:
|
||||||
|
acc = s.get("accepted", 0)
|
||||||
|
err = s.get("error", "")
|
||||||
|
exc = s.get("excluded", {})
|
||||||
|
exc_total = sum(exc.values()) if exc else s.get("excluded_total", 0)
|
||||||
|
parts = [f"{s['name']:12s}: {acc} bytů"]
|
||||||
|
if exc_total:
|
||||||
|
parts.append(f"({exc_total} vyloučeno)")
|
||||||
|
if err:
|
||||||
|
parts.append(f"[CHYBA: {err}]")
|
||||||
|
print(" " + " ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
985
mapa_bytu.html
985
mapa_bytu.html
File diff suppressed because it is too large
Load Diff
19
project/todo.md
Normal file
19
project/todo.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
## Features to add
|
||||||
|
|
||||||
|
- testing: run very limited scrape: 1 page, 10 properties so that we can do some validations on it
|
||||||
|
- makefile: add target to run locally (with the webserver)
|
||||||
|
- feature: store date of the last scrape somewhere, so that we know how fresh data are
|
||||||
|
- feature: ?? mark property with scrape when it first appeared - we might be able to look at recent diffs only
|
||||||
|
|
||||||
|
|
||||||
|
## code organization
|
||||||
|
- prepare reasonable code structure from the bunch of "random" files
|
||||||
|
|
||||||
|
|
||||||
|
## documentation
|
||||||
|
- precisely document original intent of the app (Maru has to provide this)
|
||||||
|
|
||||||
|
##
|
||||||
|
- prepare production run
|
||||||
|
- probably in home kubernetes
|
||||||
|
- maru-hleda-byt.lab.home.hrajfrisbee.cz
|
||||||
68
run_all.sh
Normal file → Executable file
68
run_all.sh
Normal file → Executable file
@@ -2,6 +2,8 @@
|
|||||||
# ============================================================
|
# ============================================================
|
||||||
# Spustí všechny scrapery, sloučí data a otevře mapu.
|
# Spustí všechny scrapery, sloučí data a otevře mapu.
|
||||||
# Použití: ./run_all.sh
|
# Použití: ./run_all.sh
|
||||||
|
# Nebo s limity: ./run_all.sh --max-pages 1 --max-properties 10
|
||||||
|
# Nebo s logováním: ./run_all.sh --log-level DEBUG
|
||||||
# ============================================================
|
# ============================================================
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
cd "$(dirname "$0")"
|
cd "$(dirname "$0")"
|
||||||
@@ -14,6 +16,50 @@ NC='\033[0m'
|
|||||||
TOTAL=6
|
TOTAL=6
|
||||||
CURRENT=0
|
CURRENT=0
|
||||||
FAILED=0
|
FAILED=0
|
||||||
|
START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
|
||||||
|
START_EPOCH=$(date +%s)
|
||||||
|
LOG_FILE="$(pwd)/scrape_run.log"
|
||||||
|
|
||||||
|
# Mark status as running
|
||||||
|
echo '{"status":"running"}' > status.json
|
||||||
|
|
||||||
|
show_help() {
|
||||||
|
echo "Usage: ./run_all.sh [OPTIONS]"
|
||||||
|
echo ""
|
||||||
|
echo "Spustí všechny scrapery, sloučí data a otevře mapu."
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --max-pages N Maximální počet stránek ke stažení z každého zdroje"
|
||||||
|
echo " --max-properties N Maximální počet nemovitostí ke stažení z každého zdroje"
|
||||||
|
echo " --log-level LEVEL Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
|
||||||
|
echo " -h, --help Zobrazí tuto nápovědu"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " ./run_all.sh # plný běh"
|
||||||
|
echo " ./run_all.sh --max-pages 1 --max-properties 10 # rychlý test"
|
||||||
|
echo " ./run_all.sh --log-level DEBUG # s debug logováním"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
SCRAPER_ARGS=""
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--max-pages|--max-properties|--log-level)
|
||||||
|
SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown argument: $1"
|
||||||
|
echo ""
|
||||||
|
show_help
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
step() {
|
step() {
|
||||||
CURRENT=$((CURRENT + 1))
|
CURRENT=$((CURRENT + 1))
|
||||||
@@ -23,23 +69,25 @@ step() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# ── Scrapery (paralelně kde to jde) ─────────────────────────
|
# ── Scrapery (paralelně kde to jde) ─────────────────────────
|
||||||
|
# Tee all output to log file for status generation
|
||||||
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||||
|
|
||||||
step "Sreality"
|
step "Sreality"
|
||||||
python3 scrape_and_map.py || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
|
|
||||||
step "Realingo"
|
step "Realingo"
|
||||||
python3 scrape_realingo.py || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
|
|
||||||
step "Bezrealitky"
|
step "Bezrealitky"
|
||||||
python3 scrape_bezrealitky.py || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
python3 scrape_bezrealitky.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
|
|
||||||
step "iDNES Reality"
|
step "iDNES Reality"
|
||||||
python3 scrape_idnes.py || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
python3 scrape_idnes.py $SCRAPER_ARGS || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
|
|
||||||
step "PSN + CityHome"
|
step "PSN + CityHome"
|
||||||
python3 scrape_psn.py &
|
python3 scrape_psn.py $SCRAPER_ARGS &
|
||||||
PID_PSN=$!
|
PID_PSN=$!
|
||||||
python3 scrape_cityhome.py &
|
python3 scrape_cityhome.py $SCRAPER_ARGS &
|
||||||
PID_CH=$!
|
PID_CH=$!
|
||||||
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||||
@@ -51,6 +99,12 @@ python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((F
|
|||||||
|
|
||||||
# ── Otevření mapy ────────────────────────────────────────────
|
# ── Otevření mapy ────────────────────────────────────────────
|
||||||
|
|
||||||
|
# ── Generování statusu ─────────────────────────────────────
|
||||||
|
|
||||||
|
END_EPOCH=$(date +%s)
|
||||||
|
DURATION=$((END_EPOCH - START_EPOCH))
|
||||||
|
python3 generate_status.py "$START_TIME" "$DURATION" "$LOG_FILE"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "============================================================"
|
echo "============================================================"
|
||||||
if [ $FAILED -eq 0 ]; then
|
if [ $FAILED -eq 0 ]; then
|
||||||
@@ -60,4 +114,4 @@ else
|
|||||||
fi
|
fi
|
||||||
echo "============================================================"
|
echo "============================================================"
|
||||||
|
|
||||||
open mapa_bytu.html
|
command -v open &>/dev/null && open mapa_bytu.html || true
|
||||||
|
|||||||
@@ -6,7 +6,9 @@ Hledá byty na prodej v Praze podle zadaných kritérií a generuje HTML mapu.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
@@ -14,6 +16,8 @@ import urllib.parse
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace filtrů ──────────────────────────────────────────────────────
|
# ── Konfigurace filtrů ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
MAX_PRICE = 13_500_000 # Kč
|
MAX_PRICE = 13_500_000 # Kč
|
||||||
@@ -39,9 +43,18 @@ HEADERS = {
|
|||||||
|
|
||||||
def api_get(url: str) -> dict:
|
def api_get(url: str) -> dict:
|
||||||
"""Fetch JSON from Sreality API."""
|
"""Fetch JSON from Sreality API."""
|
||||||
|
logger.debug(f"HTTP GET request: {url}")
|
||||||
|
logger.debug(f"Headers: {HEADERS}")
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
try:
|
||||||
return json.loads(resp.read().decode("utf-8"))
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
response_data = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(response_data)} bytes")
|
||||||
|
logger.debug(f"Response preview: {response_data[:200]}")
|
||||||
|
return json.loads(response_data)
|
||||||
|
except (urllib.error.URLError, ConnectionError, OSError) as e:
|
||||||
|
logger.error(f"HTTP request failed for {url}: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
def build_list_url(disposition: int, page: int = 1) -> str:
|
def build_list_url(disposition: int, page: int = 1) -> str:
|
||||||
@@ -59,20 +72,26 @@ def build_list_url(disposition: int, page: int = 1) -> str:
|
|||||||
return f"{API_BASE}?{urllib.parse.urlencode(params)}"
|
return f"{API_BASE}?{urllib.parse.urlencode(params)}"
|
||||||
|
|
||||||
|
|
||||||
def fetch_estates_for_disposition(disposition: int) -> list[dict]:
|
def fetch_estates_for_disposition(disposition: int, max_pages: int | None = None) -> list[dict]:
|
||||||
"""Fetch all estates for a given disposition, handling pagination."""
|
"""Fetch all estates for a given disposition, handling pagination."""
|
||||||
url = build_list_url(disposition, page=1)
|
url = build_list_url(disposition, page=1)
|
||||||
print(f" Fetching disposition {disposition}, page 1 ...")
|
logger.info(f"Fetching disposition {disposition}, page 1 ...")
|
||||||
data = api_get(url)
|
data = api_get(url)
|
||||||
total = data.get("result_size", 0)
|
total = data.get("result_size", 0)
|
||||||
estates = data.get("_embedded", {}).get("estates", [])
|
estates = data.get("_embedded", {}).get("estates", [])
|
||||||
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 0
|
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 0
|
||||||
|
|
||||||
print(f" → {total} results, {total_pages} pages")
|
logger.info(f"→ {total} results, {total_pages} pages")
|
||||||
|
|
||||||
|
# Limit pages if max_pages is specified
|
||||||
|
if max_pages is not None:
|
||||||
|
original_pages = total_pages
|
||||||
|
total_pages = min(total_pages, max_pages)
|
||||||
|
logger.debug(f"Max pages limit reached: limiting {original_pages} pages to {total_pages}")
|
||||||
|
|
||||||
for page in range(2, total_pages + 1):
|
for page in range(2, total_pages + 1):
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
print(f" Fetching page {page}/{total_pages} ...")
|
logger.info(f"Fetching page {page}/{total_pages} ...")
|
||||||
url = build_list_url(disposition, page=page)
|
url = build_list_url(disposition, page=page)
|
||||||
data = api_get(url)
|
data = api_get(url)
|
||||||
estates.extend(data.get("_embedded", {}).get("estates", []))
|
estates.extend(data.get("_embedded", {}).get("estates", []))
|
||||||
@@ -84,9 +103,12 @@ def get_estate_detail(hash_id: int) -> dict | None:
|
|||||||
"""Fetch detail for a single estate to get floor info and building type."""
|
"""Fetch detail for a single estate to get floor info and building type."""
|
||||||
try:
|
try:
|
||||||
url = DETAIL_API.format(hash_id)
|
url = DETAIL_API.format(hash_id)
|
||||||
return api_get(url)
|
logger.debug(f"Fetching detail for hash_id={hash_id}")
|
||||||
|
detail = api_get(url)
|
||||||
|
logger.debug(f"Detail fetched for hash_id={hash_id}, keys: {list(detail.keys())[:5]}")
|
||||||
|
return detail
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Warning: Could not fetch detail for {hash_id}: {e}")
|
logger.warning(f"Could not fetch detail for hash_id={hash_id}: {e}", exc_info=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -185,24 +207,28 @@ def load_cache(json_path: str = "byty_sreality.json") -> dict[int, dict]:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||||
"""Main scraping function. Returns list of filtered estates."""
|
"""Main scraping function. Returns list of filtered estates."""
|
||||||
all_estates_raw = []
|
all_estates_raw = []
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
|
||||||
print("=" * 60)
|
logger.info("=" * 60)
|
||||||
print("Stahuji inzeráty ze Sreality.cz")
|
logger.info("Stahuji inzeráty ze Sreality.cz")
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
print(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
|
logger.info(f"Dispozice: {', '.join(disposition_label(d) for d in DISPOSITIONS)}")
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
print(f"Region: Praha")
|
logger.info(f"Region: Praha")
|
||||||
if cache:
|
if cache:
|
||||||
print(f"Cache: {len(cache)} bytů z minulého běhu")
|
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
|
||||||
print("=" * 60)
|
if max_pages:
|
||||||
|
logger.info(f"Limit stran: {max_pages}")
|
||||||
|
if max_properties:
|
||||||
|
logger.info(f"Limit majetků: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
for disp in DISPOSITIONS:
|
for disp in DISPOSITIONS:
|
||||||
print(f"\n▸ Dispozice: {disposition_label(disp)}")
|
logger.info(f"\n▸ Dispozice: {disposition_label(disp)}")
|
||||||
estates = fetch_estates_for_disposition(disp)
|
estates = fetch_estates_for_disposition(disp, max_pages=max_pages)
|
||||||
for e in estates:
|
for e in estates:
|
||||||
e["_disposition_cb"] = disp
|
e["_disposition_cb"] = disp
|
||||||
all_estates_raw.extend(estates)
|
all_estates_raw.extend(estates)
|
||||||
@@ -217,10 +243,10 @@ def scrape():
|
|||||||
seen.add(hid)
|
seen.add(hid)
|
||||||
unique_estates.append(e)
|
unique_estates.append(e)
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
|
logger.info(f"Staženo celkem: {len(unique_estates)} unikátních inzerátů")
|
||||||
print(f"Stahuji detaily pro filtrování panelu/sídlišť...")
|
logger.info(f"Stahuji detaily pro filtrování panelu/sídlišť...")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
# Fetch details and filter
|
# Fetch details and filter
|
||||||
results = []
|
results = []
|
||||||
@@ -229,19 +255,26 @@ def scrape():
|
|||||||
excluded_no_detail = 0
|
excluded_no_detail = 0
|
||||||
excluded_small = 0
|
excluded_small = 0
|
||||||
cache_hits = 0
|
cache_hits = 0
|
||||||
|
details_fetched = 0
|
||||||
|
|
||||||
for i, estate in enumerate(unique_estates):
|
for i, estate in enumerate(unique_estates):
|
||||||
|
# Stop if max_properties reached
|
||||||
|
if max_properties is not None and details_fetched >= max_properties:
|
||||||
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
||||||
|
break
|
||||||
hash_id = estate.get("hash_id")
|
hash_id = estate.get("hash_id")
|
||||||
gps = estate.get("gps", {})
|
gps = estate.get("gps", {})
|
||||||
|
|
||||||
if not gps or not gps.get("lat") or not gps.get("lon"):
|
if not gps or not gps.get("lat") or not gps.get("lon"):
|
||||||
excluded_no_gps += 1
|
excluded_no_gps += 1
|
||||||
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (no GPS)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check cache — if hash_id exists and price unchanged, reuse
|
# Check cache — if hash_id exists and price unchanged, reuse
|
||||||
cached = cache.get(hash_id)
|
cached = cache.get(hash_id)
|
||||||
if cached and cached.get("price") == estate.get("price", 0):
|
if cached and cached.get("price") == estate.get("price", 0):
|
||||||
cache_hits += 1
|
cache_hits += 1
|
||||||
|
logger.debug(f"Cache hit for hash_id={hash_id}")
|
||||||
results.append(cached)
|
results.append(cached)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -250,13 +283,15 @@ def scrape():
|
|||||||
detail = get_estate_detail(hash_id)
|
detail = get_estate_detail(hash_id)
|
||||||
if not detail:
|
if not detail:
|
||||||
excluded_no_detail += 1
|
excluded_no_detail += 1
|
||||||
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (no detail)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check panel / sídliště
|
# Check panel / sídliště
|
||||||
is_excluded, reason = is_panel_or_sidliste(detail)
|
is_excluded, reason = is_panel_or_sidliste(detail)
|
||||||
if is_excluded:
|
if is_excluded:
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen #{hash_id}: {reason}")
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (panel/sídliště): {reason}")
|
||||||
|
logger.info(f"✗ Vyloučen #{hash_id}: {reason}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Parse floor
|
# Parse floor
|
||||||
@@ -276,7 +311,8 @@ def scrape():
|
|||||||
# Filter by minimum area
|
# Filter by minimum area
|
||||||
if area is not None and area < MIN_AREA:
|
if area is not None and area < MIN_AREA:
|
||||||
excluded_small += 1
|
excluded_small += 1
|
||||||
print(f" ✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
|
logger.debug(f"Filter: hash_id={hash_id} - excluded (area {area} m² < {MIN_AREA} m²)")
|
||||||
|
logger.info(f"✗ Vyloučen #{hash_id}: malá plocha ({area} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get building type
|
# Get building type
|
||||||
@@ -311,22 +347,24 @@ def scrape():
|
|||||||
"ownership": ownership,
|
"ownership": ownership,
|
||||||
"url": sreality_url(hash_id, seo),
|
"url": sreality_url(hash_id, seo),
|
||||||
"image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
|
"image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
details_fetched += 1
|
||||||
|
|
||||||
if (i + 1) % 20 == 0:
|
if (i + 1) % 20 == 0:
|
||||||
print(f" Zpracováno {i + 1}/{len(unique_estates)} ...")
|
logger.info(f"Zpracováno {i + 1}/{len(unique_estates)} ...")
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky:")
|
logger.info(f"Výsledky:")
|
||||||
print(f" Celkem staženo: {len(unique_estates)}")
|
logger.info(f" Celkem staženo: {len(unique_estates)}")
|
||||||
print(f" Z cache (přeskočeno): {cache_hits}")
|
logger.info(f" Z cache (přeskočeno): {cache_hits}")
|
||||||
print(f" Vyloučeno (panel/síd): {excluded_panel}")
|
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
|
||||||
print(f" Vyloučeno (<{MIN_AREA} m²): {excluded_small}")
|
logger.info(f" Vyloučeno (<{MIN_AREA} m²): {excluded_small}")
|
||||||
print(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
||||||
print(f" Vyloučeno (bez detailu): {excluded_no_detail}")
|
logger.info(f" Vyloučeno (bez detailu): {excluded_no_detail}")
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -336,20 +374,58 @@ def scrape():
|
|||||||
def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
||||||
"""Generate an interactive Leaflet.js HTML map."""
|
"""Generate an interactive Leaflet.js HTML map."""
|
||||||
|
|
||||||
# Color by disposition
|
# Color by price per m² — cool blue→warm red scale, no yellow
|
||||||
color_map = {
|
# Thresholds based on Prague market distribution (p25=120k, p50=144k, p75=162k)
|
||||||
"3+kk": "#2196F3", # blue
|
price_color_scale = [
|
||||||
"3+1": "#4CAF50", # green
|
(110_000, "#1565C0"), # < 110k/m² → deep blue (levné)
|
||||||
"4+kk": "#FF9800", # orange
|
(130_000, "#42A5F5"), # 110–130k → light blue
|
||||||
"4+1": "#F44336", # red
|
(150_000, "#66BB6A"), # 130–150k → green (střed)
|
||||||
"5+kk": "#9C27B0", # purple
|
(165_000, "#EF6C00"), # 150–165k → dark orange
|
||||||
"5+1": "#795548", # brown
|
(float("inf"), "#C62828"), # > 165k → dark red (drahé)
|
||||||
"6+": "#607D8B", # grey-blue
|
]
|
||||||
}
|
|
||||||
|
def price_color(estate: dict) -> str:
|
||||||
|
price = estate.get("price") or 0
|
||||||
|
area = estate.get("area") or 0
|
||||||
|
if not area:
|
||||||
|
return "#9E9E9E"
|
||||||
|
ppm2 = price / area
|
||||||
|
for threshold, color in price_color_scale:
|
||||||
|
if ppm2 < threshold:
|
||||||
|
return color
|
||||||
|
return "#E53935"
|
||||||
|
|
||||||
|
# Legend bands for info panel (built once)
|
||||||
|
price_legend_items = (
|
||||||
|
'<div style="margin-bottom:4px;font-size:12px;color:#555;font-weight:600;">Cena / m²:</div>'
|
||||||
|
)
|
||||||
|
bands = [
|
||||||
|
("#1565C0", "< 110 000 Kč/m²"),
|
||||||
|
("#42A5F5", "110 – 130 000 Kč/m²"),
|
||||||
|
("#66BB6A", "130 – 150 000 Kč/m²"),
|
||||||
|
("#EF6C00", "150 – 165 000 Kč/m²"),
|
||||||
|
("#C62828", "> 165 000 Kč/m²"),
|
||||||
|
("#9E9E9E", "cena/plocha neuvedena"),
|
||||||
|
]
|
||||||
|
for bcolor, blabel in bands:
|
||||||
|
price_legend_items += (
|
||||||
|
f'<div style="display:flex;align-items:center;gap:6px;margin:2px 0;">'
|
||||||
|
f'<span style="width:14px;height:14px;border-radius:50%;background:{bcolor};'
|
||||||
|
f'display:inline-block;border:2px solid white;box-shadow:0 1px 3px rgba(0,0,0,0.3);flex-shrink:0;"></span>'
|
||||||
|
f'<span>{blabel}</span></div>'
|
||||||
|
)
|
||||||
|
# New marker indicator — bigger dot, no extra border
|
||||||
|
price_legend_items += (
|
||||||
|
'<div style="display:flex;align-items:center;gap:6px;margin:6px 0 0 0;'
|
||||||
|
'padding-top:6px;border-top:1px solid #eee;">'
|
||||||
|
'<span style="width:18px;height:18px;border-radius:50%;background:#66BB6A;'
|
||||||
|
'display:inline-block;box-shadow:0 1px 4px rgba(0,0,0,0.35);flex-shrink:0;"></span>'
|
||||||
|
'<span>Nové (z dnešního scrapu) — větší</span></div>'
|
||||||
|
)
|
||||||
|
|
||||||
markers_js = ""
|
markers_js = ""
|
||||||
for e in estates:
|
for e in estates:
|
||||||
color = color_map.get(e["disposition"], "#999999")
|
color = price_color(e)
|
||||||
floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
|
floor_text = f'{e["floor"]}. NP' if e["floor"] else "neuvedeno"
|
||||||
area_text = f'{e["area"]} m²' if e["area"] else "neuvedeno"
|
area_text = f'{e["area"]} m²' if e["area"] else "neuvedeno"
|
||||||
building_text = e["building_type"] or "neuvedeno"
|
building_text = e["building_type"] or "neuvedeno"
|
||||||
@@ -368,11 +444,19 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
|
|
||||||
hash_id = e.get("hash_id", "")
|
hash_id = e.get("hash_id", "")
|
||||||
|
|
||||||
|
scraped_at = e.get("scraped_at", "")
|
||||||
|
is_new = scraped_at == datetime.now().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
new_badge = (
|
||||||
|
'<span style="margin-left:6px;font-size:11px;background:#FFD600;color:#333;'
|
||||||
|
'padding:1px 6px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
|
||||||
|
if is_new else ""
|
||||||
|
)
|
||||||
popup = (
|
popup = (
|
||||||
f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
|
f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
|
||||||
f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
|
f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
|
||||||
f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
|
f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
|
||||||
f'padding:1px 6px;border-radius:3px;">{source_label}</span><br>'
|
f'padding:1px 6px;border-radius:3px;">{source_label}</span>{new_badge}<br>'
|
||||||
f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
|
f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
|
||||||
f'{floor_note}<br><br>'
|
f'{floor_note}<br><br>'
|
||||||
f'<b>{e["locality"]}</b><br>'
|
f'<b>{e["locality"]}</b><br>'
|
||||||
@@ -401,27 +485,33 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
popup = popup.replace("'", "\\'").replace("\n", "")
|
popup = popup.replace("'", "\\'").replace("\n", "")
|
||||||
|
|
||||||
is_fav = source in ("psn", "cityhome")
|
is_fav = source in ("psn", "cityhome")
|
||||||
marker_fn = "addHeartMarker" if is_fav else "addMarker"
|
|
||||||
|
if is_fav:
|
||||||
|
marker_fn = "addHeartMarker"
|
||||||
|
elif is_new:
|
||||||
|
marker_fn = "addNewMarker"
|
||||||
|
else:
|
||||||
|
marker_fn = "addMarker"
|
||||||
markers_js += (
|
markers_js += (
|
||||||
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
|
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build legend
|
# Build legend — price per m² bands + disposition counts
|
||||||
legend_items = ""
|
legend_items = price_legend_items
|
||||||
|
|
||||||
|
# Disposition counts below the color legend
|
||||||
disp_counts = {}
|
disp_counts = {}
|
||||||
for e in estates:
|
for e in estates:
|
||||||
d = e["disposition"]
|
d = e["disposition"]
|
||||||
disp_counts[d] = disp_counts.get(d, 0) + 1
|
disp_counts[d] = disp_counts.get(d, 0) + 1
|
||||||
for disp, color in color_map.items():
|
disp_order = ["3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+"]
|
||||||
count = disp_counts.get(disp, 0)
|
disp_summary = ", ".join(
|
||||||
if count > 0:
|
f"{d} ({disp_counts[d]})" for d in disp_order if d in disp_counts
|
||||||
legend_items += (
|
)
|
||||||
f'<div style="display:flex;align-items:center;gap:6px;margin:3px 0;">'
|
legend_items += (
|
||||||
f'<span style="width:14px;height:14px;border-radius:50%;'
|
f'<div style="margin-top:8px;padding-top:6px;border-top:1px solid #eee;'
|
||||||
f'background:{color};display:inline-block;border:2px solid white;'
|
f'font-size:12px;color:#666;">{disp_summary}</div>'
|
||||||
f'box-shadow:0 1px 3px rgba(0,0,0,0.3);"></span>'
|
)
|
||||||
f'<span>{disp} ({count})</span></div>'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Heart marker legend for PSN/CityHome
|
# Heart marker legend for PSN/CityHome
|
||||||
fav_count = sum(1 for e in estates if e.get("source") in ("psn", "cityhome"))
|
fav_count = sum(1 for e in estates if e.get("source") in ("psn", "cityhome"))
|
||||||
@@ -456,6 +546,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
body {{ font-family: system-ui, -apple-system, sans-serif; }}
|
body {{ font-family: system-ui, -apple-system, sans-serif; }}
|
||||||
#map {{ width: 100%; height: 100vh; }}
|
#map {{ width: 100%; height: 100vh; }}
|
||||||
.heart-icon {{ background: none !important; border: none !important; }}
|
.heart-icon {{ background: none !important; border: none !important; }}
|
||||||
|
.star-icon {{ background: none !important; border: none !important; }}
|
||||||
.rate-btn:hover {{ background: #f0f0f0 !important; }}
|
.rate-btn:hover {{ background: #f0f0f0 !important; }}
|
||||||
.rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
|
.rate-btn.active-fav {{ background: #FFF9C4 !important; border-color: #FFC107 !important; }}
|
||||||
.rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
|
.rate-btn.active-rej {{ background: #FFEBEE !important; border-color: #F44336 !important; }}
|
||||||
@@ -466,13 +557,42 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
}}
|
}}
|
||||||
.marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
|
.marker-favorite {{ animation: pulse-glow 2s ease-in-out infinite; border-radius: 50%; }}
|
||||||
.heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
|
.heart-icon-fav svg path {{ stroke: gold !important; stroke-width: 2.5 !important; filter: drop-shadow(0 0 4px rgba(255,193,7,0.7)); }}
|
||||||
.heart-icon-rej {{ opacity: 0.2 !important; }}
|
.heart-icon-rej {{ opacity: 0.4 !important; filter: grayscale(1); }}
|
||||||
|
.reject-overlay {{ background: none !important; border: none !important; pointer-events: none !important; }}
|
||||||
|
@keyframes pulse-new {{
|
||||||
|
0% {{ stroke-opacity: 1; stroke-width: 3px; r: 11; }}
|
||||||
|
50% {{ stroke-opacity: 0.4; stroke-width: 6px; r: 12; }}
|
||||||
|
100% {{ stroke-opacity: 1; stroke-width: 3px; r: 11; }}
|
||||||
|
}}
|
||||||
|
.marker-new {{ animation: pulse-new 2s ease-in-out infinite; }}
|
||||||
.info-panel {{
|
.info-panel {{
|
||||||
position: absolute; top: 10px; right: 10px; z-index: 1000;
|
position: absolute; top: 10px; right: 10px; z-index: 1000;
|
||||||
background: white; padding: 16px; border-radius: 10px;
|
background: white; padding: 16px; border-radius: 10px;
|
||||||
box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
|
box-shadow: 0 2px 12px rgba(0,0,0,0.15); max-width: 260px;
|
||||||
font-size: 13px; line-height: 1.5;
|
font-size: 13px; line-height: 1.5;
|
||||||
|
transition: transform 0.3s ease, opacity 0.3s ease;
|
||||||
}}
|
}}
|
||||||
|
.info-panel.collapsed {{
|
||||||
|
transform: translateX(calc(100% + 20px));
|
||||||
|
opacity: 0; pointer-events: none;
|
||||||
|
}}
|
||||||
|
.panel-open-btn {{
|
||||||
|
position: absolute; top: 10px; right: 10px; z-index: 1001;
|
||||||
|
width: 40px; height: 40px; border-radius: 8px;
|
||||||
|
background: white; border: none; cursor: pointer;
|
||||||
|
box-shadow: 0 2px 12px rgba(0,0,0,0.15);
|
||||||
|
font-size: 20px; display: flex; align-items: center; justify-content: center;
|
||||||
|
transition: opacity 0.3s ease;
|
||||||
|
}}
|
||||||
|
.panel-open-btn.hidden {{ opacity: 0; pointer-events: none; }}
|
||||||
|
.panel-close-btn {{
|
||||||
|
position: absolute; top: 8px; right: 8px;
|
||||||
|
width: 28px; height: 28px; border-radius: 6px;
|
||||||
|
background: none; border: 1px solid #ddd; cursor: pointer;
|
||||||
|
font-size: 16px; display: flex; align-items: center; justify-content: center;
|
||||||
|
color: #888;
|
||||||
|
}}
|
||||||
|
.panel-close-btn:hover {{ background: #f0f0f0; color: #333; }}
|
||||||
.info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
|
.info-panel h2 {{ font-size: 16px; margin-bottom: 8px; }}
|
||||||
.info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
|
.info-panel .stats {{ color: #666; margin-bottom: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee; }}
|
||||||
.filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
|
.filter-section {{ margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; }}
|
||||||
@@ -480,18 +600,26 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
.filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
|
.filter-section input[type="checkbox"] {{ accent-color: #1976D2; }}
|
||||||
#floor-filter {{ margin-top: 8px; }}
|
#floor-filter {{ margin-top: 8px; }}
|
||||||
#floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
|
#floor-filter select {{ width: 100%; padding: 4px; border-radius: 4px; border: 1px solid #ccc; }}
|
||||||
|
.status-link {{ display: block; margin-top: 10px; padding-top: 10px; border-top: 1px solid #eee; text-align: center; }}
|
||||||
|
.status-link a {{ color: #1976D2; text-decoration: none; font-size: 12px; }}
|
||||||
|
@media (max-width: 600px) {{
|
||||||
|
.info-panel {{ max-width: calc(100vw - 60px); right: 10px; }}
|
||||||
|
.info-panel.collapsed {{ transform: translateX(calc(100% + 20px)); }}
|
||||||
|
.panel-close-btn {{ top: 6px; right: 6px; }}
|
||||||
|
}}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="map"></div>
|
<div id="map"></div>
|
||||||
<div class="info-panel">
|
<button class="panel-open-btn hidden" id="panel-open-btn" onclick="togglePanel()">☰</button>
|
||||||
|
<div class="info-panel" id="info-panel">
|
||||||
|
<button class="panel-close-btn" id="panel-close-btn" onclick="togglePanel()">✕</button>
|
||||||
<h2>Byty v Praze</h2>
|
<h2>Byty v Praze</h2>
|
||||||
<div class="stats">
|
<div class="stats">
|
||||||
<div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
|
<div>Celkem: <b id="visible-count">{len(estates)}</b> bytů</div>
|
||||||
<div>Cena: {min_price} — {max_price}</div>
|
<div>Cena: {min_price} — {max_price}</div>
|
||||||
<div>Průměr: {avg_price}</div>
|
<div>Průměr: {avg_price}</div>
|
||||||
</div>
|
</div>
|
||||||
<div><b>Dispozice:</b></div>
|
|
||||||
{legend_items}
|
{legend_items}
|
||||||
<div class="filter-section">
|
<div class="filter-section">
|
||||||
<b>Filtry:</b>
|
<b>Filtry:</b>
|
||||||
@@ -525,6 +653,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
|
|||||||
Skrýt zamítnuté
|
Skrýt zamítnuté
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="status-link"><a href="status.html">Scraper status</a></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
@@ -560,6 +689,23 @@ function addMarker(lat, lon, color, popup, hashId) {{
|
|||||||
marker.addTo(map);
|
marker.addTo(map);
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
function addNewMarker(lat, lon, color, popup, hashId) {{
|
||||||
|
var marker = L.circleMarker([lat, lon], {{
|
||||||
|
radius: 12,
|
||||||
|
fillColor: color,
|
||||||
|
color: color,
|
||||||
|
weight: 4,
|
||||||
|
opacity: 0.35,
|
||||||
|
fillOpacity: 0.95,
|
||||||
|
}}).bindPopup(popup);
|
||||||
|
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isNew: true }};
|
||||||
|
allMarkers.push(marker);
|
||||||
|
marker.addTo(map);
|
||||||
|
marker.on('add', function() {{
|
||||||
|
if (marker._path) marker._path.classList.add('marker-new');
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
|
||||||
function heartIcon(color) {{
|
function heartIcon(color) {{
|
||||||
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
|
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">'
|
||||||
+ '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
|
+ '<path d="M12 21.35l-1.45-1.32C5.4 15.36 2 12.28 2 8.5 '
|
||||||
@@ -575,6 +721,21 @@ function heartIcon(color) {{
|
|||||||
}});
|
}});
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
function starIcon() {{
|
||||||
|
var svg = '<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" viewBox="0 0 24 24">'
|
||||||
|
+ '<path d="M12 2l3.09 6.26L22 9.27l-5 4.87L18.18 22 12 18.27 '
|
||||||
|
+ '5.82 22 7 14.14 2 9.27l6.91-1.01L12 2z" '
|
||||||
|
+ 'fill="#FFC107" stroke="#F57F17" stroke-width="1" '
|
||||||
|
+ 'filter="drop-shadow(0 1px 3px rgba(0,0,0,0.3))"/></svg>';
|
||||||
|
return L.divIcon({{
|
||||||
|
html: svg,
|
||||||
|
className: 'star-icon',
|
||||||
|
iconSize: [28, 28],
|
||||||
|
iconAnchor: [14, 14],
|
||||||
|
popupAnchor: [0, -14],
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
|
||||||
function addHeartMarker(lat, lon, color, popup, hashId) {{
|
function addHeartMarker(lat, lon, color, popup, hashId) {{
|
||||||
var marker = L.marker([lat, lon], {{
|
var marker = L.marker([lat, lon], {{
|
||||||
icon: heartIcon(color),
|
icon: heartIcon(color),
|
||||||
@@ -600,6 +761,36 @@ function saveRatings(ratings) {{
|
|||||||
localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
|
localStorage.setItem(RATINGS_KEY, JSON.stringify(ratings));
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
function addRejectStrike(marker) {{
|
||||||
|
removeRejectStrike(marker);
|
||||||
|
var color = marker._data.color || '#999';
|
||||||
|
// SVG "no entry" icon — circle with diagonal line, colored to match marker
|
||||||
|
var svg = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20">'
|
||||||
|
+ '<circle cx="12" cy="12" r="10" fill="none" stroke="' + color + '" stroke-width="2.5" opacity="0.85"/>'
|
||||||
|
+ '<line x1="5.5" y1="5.5" x2="18.5" y2="18.5" stroke="' + color + '" stroke-width="2.5" stroke-linecap="round" opacity="0.85"/>'
|
||||||
|
+ '</svg>';
|
||||||
|
var icon = L.divIcon({{
|
||||||
|
className: 'reject-overlay',
|
||||||
|
html: svg,
|
||||||
|
iconSize: [20, 20],
|
||||||
|
iconAnchor: [10, 10],
|
||||||
|
}});
|
||||||
|
var m = L.marker([marker._data.lat, marker._data.lon], {{
|
||||||
|
icon: icon,
|
||||||
|
interactive: false,
|
||||||
|
pane: 'markerPane',
|
||||||
|
}});
|
||||||
|
m.addTo(map);
|
||||||
|
marker._rejectStrike = m;
|
||||||
|
}}
|
||||||
|
|
||||||
|
function removeRejectStrike(marker) {{
|
||||||
|
if (marker._rejectStrike) {{
|
||||||
|
map.removeLayer(marker._rejectStrike);
|
||||||
|
marker._rejectStrike = null;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
|
||||||
function applyMarkerStyle(marker, status) {{
|
function applyMarkerStyle(marker, status) {{
|
||||||
if (marker._data.isHeart) {{
|
if (marker._data.isHeart) {{
|
||||||
var el = marker._icon;
|
var el = marker._icon;
|
||||||
@@ -614,26 +805,59 @@ function applyMarkerStyle(marker, status) {{
|
|||||||
}}
|
}}
|
||||||
}} else {{
|
}} else {{
|
||||||
if (status === 'fav') {{
|
if (status === 'fav') {{
|
||||||
marker.setStyle({{
|
removeRejectStrike(marker);
|
||||||
radius: 12, fillOpacity: 1, weight: 3,
|
if (!marker._data._origCircle) marker._data._origCircle = true;
|
||||||
fillColor: marker._data.color, color: '#fff',
|
var popup = marker.getPopup();
|
||||||
}});
|
var popupContent = popup ? popup.getContent() : '';
|
||||||
if (marker._path) marker._path.classList.add('marker-favorite');
|
var wasOnMap = map.hasLayer(marker);
|
||||||
|
if (wasOnMap) map.removeLayer(marker);
|
||||||
|
var starMarker = L.marker([marker._data.lat, marker._data.lon], {{
|
||||||
|
icon: starIcon(),
|
||||||
|
}}).bindPopup(popupContent);
|
||||||
|
starMarker._data = marker._data;
|
||||||
|
var idx = allMarkers.indexOf(marker);
|
||||||
|
if (idx !== -1) allMarkers[idx] = starMarker;
|
||||||
|
if (wasOnMap) starMarker.addTo(map);
|
||||||
}} else if (status === 'reject') {{
|
}} else if (status === 'reject') {{
|
||||||
marker.setStyle({{
|
if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
|
||||||
radius: 6, fillOpacity: 0.15, fillColor: '#999', color: '#bbb', weight: 1,
|
revertToCircle(marker, {{ radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1 }});
|
||||||
}});
|
}} else {{
|
||||||
if (marker._path) marker._path.classList.remove('marker-favorite');
|
marker.setStyle({{
|
||||||
|
radius: 6, fillOpacity: 0.35, fillColor: marker._data.color, color: '#fff', weight: 1,
|
||||||
|
}});
|
||||||
|
if (marker._path) marker._path.classList.remove('marker-favorite');
|
||||||
|
}}
|
||||||
|
// Add strikethrough line over the marker
|
||||||
|
addRejectStrike(marker);
|
||||||
}} else {{
|
}} else {{
|
||||||
marker.setStyle({{
|
if (marker._data._origCircle && !(marker instanceof L.CircleMarker)) {{
|
||||||
radius: 8, fillColor: marker._data.color, color: '#fff',
|
revertToCircle(marker, {{ radius: 8, fillColor: marker._data.color, color: '#fff', weight: 2, fillOpacity: 0.85 }});
|
||||||
weight: 2, fillOpacity: 0.85,
|
}} else {{
|
||||||
}});
|
marker.setStyle({{
|
||||||
if (marker._path) marker._path.classList.remove('marker-favorite');
|
radius: 8, fillColor: marker._data.color, color: '#fff',
|
||||||
|
weight: 2, fillOpacity: 0.85,
|
||||||
|
}});
|
||||||
|
if (marker._path) marker._path.classList.remove('marker-favorite');
|
||||||
|
}}
|
||||||
|
if (marker._path) marker._path.classList.remove('marker-rejected');
|
||||||
|
removeRejectStrike(marker);
|
||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
function revertToCircle(marker, style) {{
|
||||||
|
var popup = marker.getPopup();
|
||||||
|
var popupContent = popup ? popup.getContent() : '';
|
||||||
|
var wasOnMap = map.hasLayer(marker);
|
||||||
|
if (wasOnMap) map.removeLayer(marker);
|
||||||
|
var cm = L.circleMarker([marker._data.lat, marker._data.lon], style).bindPopup(popupContent);
|
||||||
|
cm._data = marker._data;
|
||||||
|
delete cm._data._starRef;
|
||||||
|
var idx = allMarkers.indexOf(marker);
|
||||||
|
if (idx !== -1) allMarkers[idx] = cm;
|
||||||
|
if (wasOnMap) cm.addTo(map);
|
||||||
|
}}
|
||||||
|
|
||||||
function rateMarker(marker, action) {{
|
function rateMarker(marker, action) {{
|
||||||
var hashId = marker._data.hashId;
|
var hashId = marker._data.hashId;
|
||||||
var ratings = loadRatings();
|
var ratings = loadRatings();
|
||||||
@@ -795,8 +1019,12 @@ function applyFilters() {{
|
|||||||
if (show) {{
|
if (show) {{
|
||||||
if (!map.hasLayer(m)) m.addTo(map);
|
if (!map.hasLayer(m)) m.addTo(map);
|
||||||
visible++;
|
visible++;
|
||||||
|
// Show strike line if rejected and visible
|
||||||
|
if (m._rejectStrike && !map.hasLayer(m._rejectStrike)) m._rejectStrike.addTo(map);
|
||||||
}} else {{
|
}} else {{
|
||||||
if (map.hasLayer(m)) map.removeLayer(m);
|
if (map.hasLayer(m)) map.removeLayer(m);
|
||||||
|
// Hide strike line when marker hidden
|
||||||
|
if (m._rejectStrike && map.hasLayer(m._rejectStrike)) map.removeLayer(m._rejectStrike);
|
||||||
}}
|
}}
|
||||||
}});
|
}});
|
||||||
|
|
||||||
@@ -814,21 +1042,55 @@ function applyFilters() {{
|
|||||||
// Initialize ratings on load
|
// Initialize ratings on load
|
||||||
restoreRatings();
|
restoreRatings();
|
||||||
|
|
||||||
|
// ── Panel toggle ──────────────────────────────────────────────
|
||||||
|
function togglePanel() {{
|
||||||
|
var panel = document.getElementById('info-panel');
|
||||||
|
var openBtn = document.getElementById('panel-open-btn');
|
||||||
|
var isOpen = !panel.classList.contains('collapsed');
|
||||||
|
if (isOpen) {{
|
||||||
|
panel.classList.add('collapsed');
|
||||||
|
openBtn.classList.remove('hidden');
|
||||||
|
}} else {{
|
||||||
|
panel.classList.remove('collapsed');
|
||||||
|
openBtn.classList.add('hidden');
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
|
||||||
|
// On mobile, start with panel collapsed
|
||||||
|
if (window.innerWidth <= 600) {{
|
||||||
|
document.getElementById('info-panel').classList.add('collapsed');
|
||||||
|
document.getElementById('panel-open-btn').classList.remove('hidden');
|
||||||
|
}}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>"""
|
</html>"""
|
||||||
|
|
||||||
path = Path(output_path)
|
path = Path(output_path)
|
||||||
path.write_text(html, encoding="utf-8")
|
path.write_text(html, encoding="utf-8")
|
||||||
print(f"\n✓ Mapa uložena: {path.resolve()}")
|
logger.info(f"\n✓ Mapa uložena: {path.resolve()}")
|
||||||
return str(path.resolve())
|
return str(path.resolve())
|
||||||
|
|
||||||
|
|
||||||
# ── Main ─────────────────────────────────────────────────────────────────────
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from Sreality.cz")
|
||||||
|
parser.add_argument("--max-pages", type=int, help="Maximum number of pages to scrape")
|
||||||
|
parser.add_argument("--max-properties", type=int, help="Maximum number of properties to fetch details for")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
# Save raw data as JSON backup
|
# Save raw data as JSON backup
|
||||||
@@ -837,12 +1099,12 @@ if __name__ == "__main__":
|
|||||||
json.dumps(estates, ensure_ascii=False, indent=2),
|
json.dumps(estates, ensure_ascii=False, indent=2),
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
print(f"✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"✓ Data uložena: {json_path.resolve()}")
|
||||||
|
|
||||||
# Generate map
|
# Generate map
|
||||||
map_path = generate_map(estates)
|
map_path = generate_map(estates)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"\n⏱ Celkový čas: {elapsed:.0f} s")
|
||||||
print(f"\nOtevři v prohlížeči:\n file://{map_path}")
|
logger.info(f"\nOtevři v prohlížeči:\n file://{map_path}")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty neodpovídají kritériím :(")
|
logger.info("\nŽádné byty neodpovídají kritériím :(")
|
||||||
|
|||||||
@@ -6,13 +6,18 @@ Výstup: byty_bezrealitky.json
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
MAX_PRICE = 13_500_000
|
MAX_PRICE = 13_500_000
|
||||||
@@ -69,51 +74,63 @@ def fetch_page(page: int) -> tuple[list[dict], int]:
|
|||||||
Returns (list of advert dicts from Apollo cache, total count).
|
Returns (list of advert dicts from Apollo cache, total count).
|
||||||
"""
|
"""
|
||||||
url = f"{BASE_URL}/vypis/nabidka-prodej/byt/praha?page={page}"
|
url = f"{BASE_URL}/vypis/nabidka-prodej/byt/praha?page={page}"
|
||||||
|
logger.debug(f"HTTP GET request: {url}")
|
||||||
|
logger.debug(f"Headers: {HEADERS}")
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
|
||||||
html = resp.read().decode("utf-8")
|
|
||||||
|
|
||||||
match = re.search(
|
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
|
||||||
html, re.DOTALL
|
|
||||||
)
|
|
||||||
if not match:
|
|
||||||
return [], 0
|
|
||||||
|
|
||||||
data = json.loads(match.group(1))
|
|
||||||
cache = data["props"]["pageProps"]["apolloCache"]
|
|
||||||
|
|
||||||
# Extract adverts from cache
|
|
||||||
adverts = []
|
|
||||||
for key, val in cache.items():
|
|
||||||
if key.startswith("Advert:") and isinstance(val, dict) and val.get("__typename") == "Advert":
|
|
||||||
adverts.append(val)
|
|
||||||
|
|
||||||
# Get total count from ROOT_QUERY
|
|
||||||
total = 0
|
|
||||||
root = cache.get("ROOT_QUERY", {})
|
|
||||||
for key, val in root.items():
|
|
||||||
if "listAdverts" in key and isinstance(val, dict):
|
|
||||||
tc = val.get("totalCount")
|
|
||||||
if tc and tc > total:
|
|
||||||
total = tc
|
|
||||||
|
|
||||||
return adverts, total
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_detail(uri: str) -> dict | None:
|
|
||||||
"""Fetch detail page for a listing."""
|
|
||||||
try:
|
try:
|
||||||
url = f"{BASE_URL}/nemovitosti-byty-domy/{uri}"
|
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
html = resp.read().decode("utf-8")
|
html = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
|
||||||
|
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
||||||
html, re.DOTALL
|
html, re.DOTALL
|
||||||
)
|
)
|
||||||
if not match:
|
if not match:
|
||||||
|
logger.debug("No __NEXT_DATA__ script found in HTML")
|
||||||
|
return [], 0
|
||||||
|
|
||||||
|
data = json.loads(match.group(1))
|
||||||
|
cache = data["props"]["pageProps"]["apolloCache"]
|
||||||
|
|
||||||
|
# Extract adverts from cache
|
||||||
|
adverts = []
|
||||||
|
for key, val in cache.items():
|
||||||
|
if key.startswith("Advert:") and isinstance(val, dict) and val.get("__typename") == "Advert":
|
||||||
|
adverts.append(val)
|
||||||
|
|
||||||
|
# Get total count from ROOT_QUERY
|
||||||
|
total = 0
|
||||||
|
root = cache.get("ROOT_QUERY", {})
|
||||||
|
for key, val in root.items():
|
||||||
|
if "listAdverts" in key and isinstance(val, dict):
|
||||||
|
tc = val.get("totalCount")
|
||||||
|
if tc and tc > total:
|
||||||
|
total = tc
|
||||||
|
|
||||||
|
logger.debug(f"Page {page}: found {len(adverts)} adverts, total={total}")
|
||||||
|
return adverts, total
|
||||||
|
except (urllib.error.URLError, ConnectionError, OSError) as e:
|
||||||
|
logger.error(f"HTTP request failed for {url}: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_detail(uri: str) -> dict | None:
|
||||||
|
"""Fetch detail page for a listing."""
|
||||||
|
try:
|
||||||
|
url = f"{BASE_URL}/nemovitosti-byty-domy/{uri}"
|
||||||
|
logger.debug(f"HTTP GET request: {url}")
|
||||||
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
|
html = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
||||||
|
html, re.DOTALL
|
||||||
|
)
|
||||||
|
if not match:
|
||||||
|
logger.debug("No __NEXT_DATA__ script found in detail page")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
data = json.loads(match.group(1))
|
data = json.loads(match.group(1))
|
||||||
@@ -124,10 +141,11 @@ def fetch_detail(uri: str) -> dict | None:
|
|||||||
if key.startswith("Advert:") and isinstance(val, dict):
|
if key.startswith("Advert:") and isinstance(val, dict):
|
||||||
# Detail pages have much more fields
|
# Detail pages have much more fields
|
||||||
if "construction" in val or "etage" in val or "ownership" in val:
|
if "construction" in val or "etage" in val or "ownership" in val:
|
||||||
|
logger.debug(f"Detail found for {uri}: construction={val.get('construction')}, etage={val.get('etage')}")
|
||||||
return val
|
return val
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Warning: detail failed for {uri}: {e}")
|
logger.warning(f"Detail failed for {uri}: {e}", exc_info=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -152,35 +170,43 @@ def load_cache(json_path: str = "byty_bezrealitky.json") -> dict[int, dict]:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
|
||||||
print("=" * 60)
|
logger.info("=" * 60)
|
||||||
print("Stahuji inzeráty z Bezrealitky.cz")
|
logger.info("Stahuji inzeráty z Bezrealitky.cz")
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
print(f"Min. plocha: {MIN_AREA} m²")
|
logger.info(f"Min. plocha: {MIN_AREA} m²")
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
print(f"Region: Praha")
|
logger.info(f"Region: Praha")
|
||||||
if cache:
|
if cache:
|
||||||
print(f"Cache: {len(cache)} bytů z minulého běhu")
|
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
|
||||||
print("=" * 60)
|
if max_pages:
|
||||||
|
logger.info(f"Max. stran: {max_pages}")
|
||||||
|
if max_properties:
|
||||||
|
logger.info(f"Max. bytů: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Step 1: Fetch all listing pages
|
# Step 1: Fetch all listing pages
|
||||||
print("\nFáze 1: Stahování seznamu inzerátů...")
|
logger.info("\nFáze 1: Stahování seznamu inzerátů...")
|
||||||
all_adverts = {} # id -> advert dict (dedup)
|
all_adverts = {} # id -> advert dict (dedup)
|
||||||
page = 1
|
page = 1
|
||||||
total = None
|
total = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
print(f" Strana {page} ...")
|
if max_pages and page > max_pages:
|
||||||
|
logger.debug(f"Max pages limit reached: {max_pages}")
|
||||||
|
break
|
||||||
|
logger.info(f"Strana {page} ...")
|
||||||
adverts, total_count = fetch_page(page)
|
adverts, total_count = fetch_page(page)
|
||||||
|
|
||||||
if total is None and total_count > 0:
|
if total is None and total_count > 0:
|
||||||
total = total_count
|
total = total_count
|
||||||
total_pages = math.ceil(total / PER_PAGE)
|
total_pages = math.ceil(total / PER_PAGE)
|
||||||
print(f" → Celkem {total} inzerátů, ~{total_pages} stran")
|
logger.info(f"→ Celkem {total} inzerátů, ~{total_pages} stran")
|
||||||
|
|
||||||
if not adverts:
|
if not adverts:
|
||||||
|
logger.debug(f"No adverts found on page {page}, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
for adv in adverts:
|
for adv in adverts:
|
||||||
@@ -193,7 +219,7 @@ def scrape():
|
|||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
print(f"\n Staženo: {len(all_adverts)} unikátních inzerátů")
|
logger.info(f"\nStaženo: {len(all_adverts)} unikátních inzerátů")
|
||||||
|
|
||||||
# Step 2: Pre-filter by disposition, price, area from list data
|
# Step 2: Pre-filter by disposition, price, area from list data
|
||||||
pre_filtered = []
|
pre_filtered = []
|
||||||
@@ -203,47 +229,57 @@ def scrape():
|
|||||||
excluded_no_gps = 0
|
excluded_no_gps = 0
|
||||||
|
|
||||||
for adv in all_adverts.values():
|
for adv in all_adverts.values():
|
||||||
|
adv_id = adv.get("id")
|
||||||
disp = adv.get("disposition", "")
|
disp = adv.get("disposition", "")
|
||||||
if disp not in WANTED_DISPOSITIONS:
|
if disp not in WANTED_DISPOSITIONS:
|
||||||
excluded_disp += 1
|
excluded_disp += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (disposition {disp})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
price = adv.get("price", 0) or 0
|
price = adv.get("price", 0) or 0
|
||||||
if price > MAX_PRICE or price == 0:
|
if price > MAX_PRICE or price == 0:
|
||||||
excluded_price += 1
|
excluded_price += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (price {price})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
surface = adv.get("surface")
|
surface = adv.get("surface")
|
||||||
if surface is not None and surface < MIN_AREA:
|
if surface is not None and surface < MIN_AREA:
|
||||||
excluded_area += 1
|
excluded_area += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (area {surface} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
gps = adv.get("gps", {})
|
gps = adv.get("gps", {})
|
||||||
if not gps or not gps.get("lat") or not gps.get("lng"):
|
if not gps or not gps.get("lat") or not gps.get("lng"):
|
||||||
excluded_no_gps += 1
|
excluded_no_gps += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (no GPS)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pre_filtered.append(adv)
|
pre_filtered.append(adv)
|
||||||
|
|
||||||
print(f"\nPo předfiltraci:")
|
logger.info(f"\nPo předfiltraci:")
|
||||||
print(f" Vyloučeno (dispozice): {excluded_disp}")
|
logger.info(f" Vyloučeno (dispozice): {excluded_disp}")
|
||||||
print(f" Vyloučeno (cena): {excluded_price}")
|
logger.info(f" Vyloučeno (cena): {excluded_price}")
|
||||||
print(f" Vyloučeno (plocha): {excluded_area}")
|
logger.info(f" Vyloučeno (plocha): {excluded_area}")
|
||||||
print(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
||||||
print(f" Zbývá: {len(pre_filtered)}")
|
logger.info(f" Zbývá: {len(pre_filtered)}")
|
||||||
|
|
||||||
# Step 3: Fetch details
|
# Step 3: Fetch details
|
||||||
print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
||||||
results = []
|
results = []
|
||||||
excluded_panel = 0
|
excluded_panel = 0
|
||||||
excluded_floor = 0
|
excluded_floor = 0
|
||||||
excluded_detail = 0
|
excluded_detail = 0
|
||||||
cache_hits = 0
|
cache_hits = 0
|
||||||
|
properties_fetched = 0
|
||||||
|
|
||||||
for i, adv in enumerate(pre_filtered):
|
for i, adv in enumerate(pre_filtered):
|
||||||
|
if max_properties and properties_fetched >= max_properties:
|
||||||
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
||||||
|
break
|
||||||
uri = adv.get("uri", "")
|
uri = adv.get("uri", "")
|
||||||
if not uri:
|
if not uri:
|
||||||
excluded_detail += 1
|
excluded_detail += 1
|
||||||
|
logger.debug(f"Filter: id={adv.get('id')} - excluded (no URI)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check cache — if hash_id exists and price unchanged, reuse
|
# Check cache — if hash_id exists and price unchanged, reuse
|
||||||
@@ -252,6 +288,7 @@ def scrape():
|
|||||||
cached = cache.get(adv_id)
|
cached = cache.get(adv_id)
|
||||||
if cached and cached.get("price") == adv_price:
|
if cached and cached.get("price") == adv_price:
|
||||||
cache_hits += 1
|
cache_hits += 1
|
||||||
|
logger.debug(f"Cache hit for id={adv_id}")
|
||||||
results.append(cached)
|
results.append(cached)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -260,26 +297,30 @@ def scrape():
|
|||||||
|
|
||||||
if not detail:
|
if not detail:
|
||||||
excluded_detail += 1
|
excluded_detail += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (detail fetch failed)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check construction — exclude panel
|
# Check construction — exclude panel
|
||||||
construction = detail.get("construction", "")
|
construction = detail.get("construction", "")
|
||||||
if construction == "PANEL":
|
if construction == "PANEL":
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen #{adv['id']}: panel")
|
logger.debug(f"Filter: id={adv['id']} - excluded (panel construction)")
|
||||||
|
logger.info(f"✗ Vyloučen #{adv['id']}: panel")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check situation — exclude sídliště
|
# Check situation — exclude sídliště
|
||||||
situation = detail.get("situation", "")
|
situation = detail.get("situation", "")
|
||||||
if situation and "HOUSING_ESTATE" in str(situation).upper():
|
if situation and "HOUSING_ESTATE" in str(situation).upper():
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen #{adv['id']}: sídliště")
|
logger.debug(f"Filter: id={adv['id']} - excluded (housing estate)")
|
||||||
|
logger.info(f"✗ Vyloučen #{adv['id']}: sídliště")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check floor (etage)
|
# Check floor (etage)
|
||||||
etage = detail.get("etage")
|
etage = detail.get("etage")
|
||||||
if etage is not None and etage < MIN_FLOOR:
|
if etage is not None and etage < MIN_FLOOR:
|
||||||
excluded_floor += 1
|
excluded_floor += 1
|
||||||
|
logger.debug(f"Filter: id={adv_id} - excluded (floor {etage})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
gps = adv.get("gps", {})
|
gps = adv.get("gps", {})
|
||||||
@@ -315,28 +356,46 @@ def scrape():
|
|||||||
"url": f"{BASE_URL}/nemovitosti-byty-domy/{uri}",
|
"url": f"{BASE_URL}/nemovitosti-byty-domy/{uri}",
|
||||||
"source": "bezrealitky",
|
"source": "bezrealitky",
|
||||||
"image": "",
|
"image": "",
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
properties_fetched += 1
|
||||||
|
|
||||||
if (i + 1) % 20 == 0:
|
if (i + 1) % 20 == 0:
|
||||||
print(f" Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky Bezrealitky:")
|
logger.info(f"Výsledky Bezrealitky:")
|
||||||
print(f" Předfiltrováno: {len(pre_filtered)}")
|
logger.info(f" Předfiltrováno: {len(pre_filtered)}")
|
||||||
print(f" Z cache (přeskočeno): {cache_hits}")
|
logger.info(f" Z cache (přeskočeno): {cache_hits}")
|
||||||
print(f" Vyloučeno (panel/síd): {excluded_panel}")
|
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
|
||||||
print(f" Vyloučeno (patro): {excluded_floor}")
|
logger.info(f" Vyloučeno (patro): {excluded_floor}")
|
||||||
print(f" Vyloučeno (bez detailu): {excluded_detail}")
|
logger.info(f" Vyloučeno (bez detailu): {excluded_detail}")
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from Bezrealitky.cz")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=None,
|
||||||
|
help="Maximum number of listing pages to scrape")
|
||||||
|
parser.add_argument("--max-properties", type=int, default=None,
|
||||||
|
help="Maximum number of properties to fetch details for")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
json_path = Path("byty_bezrealitky.json")
|
json_path = Path("byty_bezrealitky.json")
|
||||||
@@ -345,7 +404,7 @@ if __name__ == "__main__":
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
|
||||||
print(f"⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"⏱ Celkový čas: {elapsed:.0f} s")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty z Bezrealitek neodpovídají kritériím :(")
|
logger.info("\nŽádné byty z Bezrealitek neodpovídají kritériím :(")
|
||||||
|
|||||||
@@ -6,12 +6,17 @@ Výstup: byty_cityhome.json
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
MAX_PRICE = 14_000_000
|
MAX_PRICE = 14_000_000
|
||||||
@@ -29,18 +34,26 @@ HEADERS = {
|
|||||||
BASE_URL = "https://www.city-home.cz"
|
BASE_URL = "https://www.city-home.cz"
|
||||||
|
|
||||||
|
|
||||||
def fetch_url(url: str) -> str:
|
def fetch_url(url: str, retries: int = 3) -> str:
|
||||||
"""Fetch URL and return HTML string."""
|
"""Fetch URL and return HTML string. Raises HTTPError on 4xx/5xx."""
|
||||||
for attempt in range(3):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
|
logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
return resp.read().decode("utf-8")
|
html = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
|
||||||
|
return html
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
# Don't retry on HTTP errors (404, 403, etc.) — re-raise immediately
|
||||||
|
raise
|
||||||
except (ConnectionResetError, ConnectionError, urllib.error.URLError) as e:
|
except (ConnectionResetError, ConnectionError, urllib.error.URLError) as e:
|
||||||
if attempt < 2:
|
if attempt < retries - 1:
|
||||||
time.sleep((attempt + 1) * 2)
|
wait = (attempt + 1) * 2
|
||||||
print(f" Retry {attempt + 1}: {e}")
|
logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
|
||||||
|
time.sleep(wait)
|
||||||
else:
|
else:
|
||||||
|
logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@@ -114,31 +127,21 @@ def parse_filter_page(html: str) -> list[dict]:
|
|||||||
if detail_url and not detail_url.startswith("http"):
|
if detail_url and not detail_url.startswith("http"):
|
||||||
detail_url = BASE_URL + detail_url
|
detail_url = BASE_URL + detail_url
|
||||||
|
|
||||||
# Extract floor from cells — look for pattern like "3.NP" or "2.PP"
|
# Parse table cells: [unit_name, unit_type_label, address, floor, disposition, area, transaction, price]
|
||||||
cells = re.findall(r'<td[^>]*>(.*?)</td>', row_content, re.DOTALL)
|
cells = re.findall(r'<td[^>]*>(.*?)</td>', row_content, re.DOTALL)
|
||||||
floor = None
|
cell_texts = [re.sub(r'<[^>]+>', '', c).strip() for c in cells]
|
||||||
floor_text = ""
|
|
||||||
project_name = ""
|
|
||||||
|
|
||||||
for cell in cells:
|
# Cell[2] = address (e.g. "Žateckých 14"), cell[3] = floor (e.g. "3.NP")
|
||||||
cell_text = re.sub(r'<[^>]+>', '', cell).strip()
|
project_address = cell_texts[2] if len(cell_texts) > 2 else ""
|
||||||
# Floor pattern
|
|
||||||
np_match = re.search(r'(\d+)\.\s*NP', cell_text)
|
floor = None
|
||||||
pp_match = re.search(r'(\d+)\.\s*PP', cell_text)
|
if len(cell_texts) > 3:
|
||||||
|
np_match = re.search(r'(\d+)\.\s*NP', cell_texts[3])
|
||||||
|
pp_match = re.search(r'(\d+)\.\s*PP', cell_texts[3])
|
||||||
if np_match:
|
if np_match:
|
||||||
floor = int(np_match.group(1))
|
floor = int(np_match.group(1))
|
||||||
floor_text = cell_text
|
|
||||||
elif pp_match:
|
elif pp_match:
|
||||||
floor = -int(pp_match.group(1)) # Underground
|
floor = -int(pp_match.group(1))
|
||||||
floor_text = cell_text
|
|
||||||
|
|
||||||
# Extract project name — usually in a cell that's not a number/price/floor
|
|
||||||
for cell in cells:
|
|
||||||
cell_text = re.sub(r'<[^>]+>', '', cell).strip()
|
|
||||||
if cell_text and not re.match(r'^[\d\s.,]+$', cell_text) and "NP" not in cell_text and "PP" not in cell_text and "m²" not in cell_text and "Kč" not in cell_text and "EUR" not in cell_text and "CZK" not in cell_text:
|
|
||||||
if len(cell_text) > 3 and cell_text != unit_name:
|
|
||||||
project_name = cell_text
|
|
||||||
break
|
|
||||||
|
|
||||||
listing = {
|
listing = {
|
||||||
"price": int(cena.group(1)),
|
"price": int(cena.group(1)),
|
||||||
@@ -148,45 +151,75 @@ def parse_filter_page(html: str) -> list[dict]:
|
|||||||
"project_id": project.group(1) if project else "",
|
"project_id": project.group(1) if project else "",
|
||||||
"transaction": transaction.group(1) if transaction else "",
|
"transaction": transaction.group(1) if transaction else "",
|
||||||
"disposition": dispozition.group(1) if dispozition else "",
|
"disposition": dispozition.group(1) if dispozition else "",
|
||||||
"location": location.group(1) if location else "",
|
|
||||||
"url": detail_url,
|
"url": detail_url,
|
||||||
"unit_name": unit_name,
|
"unit_name": unit_name,
|
||||||
"floor": floor,
|
"floor": floor,
|
||||||
"project_name": project_name,
|
"project_address": project_address,
|
||||||
}
|
}
|
||||||
listings.append(listing)
|
listings.append(listing)
|
||||||
|
|
||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
def extract_project_gps(html: str) -> dict[str, tuple[float, float]]:
|
def get_lokalita_urls(slug: str) -> list[str]:
|
||||||
"""Extract GPS coordinates for projects from locality pages."""
|
"""Return candidate lokalita URLs to try in order."""
|
||||||
# Pattern in JS: ['<h4>Project Name</h4>...', 'LAT', 'LON', '1', 'Name']
|
return [
|
||||||
gps_data = {}
|
f"{BASE_URL}/projekty/{slug}/lokalita",
|
||||||
for match in re.finditer(r"\['[^']*<h4>([^<]+)</h4>[^']*',\s*'([\d.]+)',\s*'([\d.]+)'", html):
|
f"{BASE_URL}/bytove-domy/{slug}/lokalita",
|
||||||
name = match.group(1).strip()
|
f"{BASE_URL}/bytove-domy/{slug}/lokalita1",
|
||||||
lat = float(match.group(2))
|
]
|
||||||
lon = float(match.group(3))
|
|
||||||
gps_data[name] = (lat, lon)
|
|
||||||
return gps_data
|
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def extract_project_gps(html: str) -> tuple[float, float] | None:
|
||||||
print("=" * 60)
|
"""Extract project GPS from lokalita page JS variable.
|
||||||
print("Stahuji inzeráty z CityHome (city-home.cz)")
|
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
The page contains: var locations = [['<h4>Name</h4>...', 'LAT', 'LNG', 'CATEGORY', 'Label'], ...]
|
||||||
print(f"Min. plocha: {MIN_AREA} m²")
|
Category '1' = the project's own marker. Some projects have two cat-1 entries (data error);
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
in that case we pick the one whose name contains a digit and is not a transit landmark.
|
||||||
print("=" * 60)
|
"""
|
||||||
|
block = re.search(r'var locations\s*=\s*\[(.*?)\];', html, re.DOTALL)
|
||||||
|
if not block:
|
||||||
|
return None
|
||||||
|
|
||||||
|
entries = re.findall(
|
||||||
|
r"'<h4>(.*?)</h4>.*?',\s*'([\d.]+)',\s*'([\d.]+)',\s*'1'",
|
||||||
|
block.group(0),
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
if not entries:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
return float(entries[0][1]), float(entries[0][2])
|
||||||
|
|
||||||
|
# Multiple cat-1 entries: pick the real project marker
|
||||||
|
transit_re = re.compile(r'nádraží|park|metro|tramvaj|autobus|zastávka', re.IGNORECASE)
|
||||||
|
for name, lat, lng in entries:
|
||||||
|
if re.search(r'\d', name) and not transit_re.search(name):
|
||||||
|
return float(lat), float(lng)
|
||||||
|
|
||||||
|
# Fallback: first entry
|
||||||
|
return float(entries[0][1]), float(entries[0][2])
|
||||||
|
|
||||||
|
|
||||||
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("Stahuji inzeráty z CityHome (city-home.cz)")
|
||||||
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
|
logger.info(f"Min. plocha: {MIN_AREA} m²")
|
||||||
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
|
if max_properties:
|
||||||
|
logger.info(f"Max. bytů: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Step 1: Fetch the main filter page
|
# Step 1: Fetch the main filter page
|
||||||
print("\nFáze 1: Stahování seznamu bytů...")
|
logger.info("\nFáze 1: Stahování seznamu bytů...")
|
||||||
html = fetch_url(f"{BASE_URL}/filtr-nemovitosti1")
|
html = fetch_url(f"{BASE_URL}/filtr-nemovitosti1")
|
||||||
all_listings = parse_filter_page(html)
|
all_listings = parse_filter_page(html)
|
||||||
print(f" Nalezeno: {len(all_listings)} jednotek")
|
logger.info(f"Nalezeno: {len(all_listings)} jednotek")
|
||||||
|
|
||||||
# Step 2: Collect unique project slugs from detail URLs to fetch GPS
|
# Step 2: Collect unique project slugs from detail URLs to fetch GPS
|
||||||
print("\nFáze 2: Stahování GPS souřadnic projektů...")
|
logger.info("\nFáze 2: Stahování GPS souřadnic projektů...")
|
||||||
project_slugs = set()
|
project_slugs = set()
|
||||||
for listing in all_listings:
|
for listing in all_listings:
|
||||||
url = listing.get("url", "")
|
url = listing.get("url", "")
|
||||||
@@ -198,23 +231,27 @@ def scrape():
|
|||||||
# Fetch GPS for each project from locality pages
|
# Fetch GPS for each project from locality pages
|
||||||
project_gps = {}
|
project_gps = {}
|
||||||
for slug in sorted(project_slugs):
|
for slug in sorted(project_slugs):
|
||||||
time.sleep(0.5)
|
time.sleep(0.3)
|
||||||
try:
|
gps = None
|
||||||
locality_url = f"{BASE_URL}/projekty/{slug}/lokalita"
|
for url in get_lokalita_urls(slug):
|
||||||
loc_html = fetch_url(locality_url)
|
try:
|
||||||
gps = extract_project_gps(loc_html)
|
logger.debug(f"Fetching project GPS: {url}")
|
||||||
if gps:
|
loc_html = fetch_url(url)
|
||||||
# Take first entry (the project itself)
|
gps = extract_project_gps(loc_html)
|
||||||
first_name, (lat, lon) = next(iter(gps.items()))
|
if gps:
|
||||||
project_gps[slug] = (lat, lon)
|
break
|
||||||
print(f" ✓ {slug}: {lat}, {lon}")
|
except Exception as e:
|
||||||
else:
|
logger.debug(f"GPS fetch failed for {url}: {e}")
|
||||||
print(f" ✗ {slug}: GPS nenalezeno")
|
continue
|
||||||
except Exception as e:
|
|
||||||
print(f" ✗ {slug}: chyba ({e})")
|
if gps:
|
||||||
|
project_gps[slug] = gps
|
||||||
|
logger.info(f"✓ {slug}: {gps[0]}, {gps[1]}")
|
||||||
|
else:
|
||||||
|
logger.info(f"✗ {slug}: GPS nenalezeno")
|
||||||
|
|
||||||
# Step 3: Filter listings
|
# Step 3: Filter listings
|
||||||
print(f"\nFáze 3: Filtrování...")
|
logger.info(f"\nFáze 3: Filtrování...")
|
||||||
results = []
|
results = []
|
||||||
excluded_sold = 0
|
excluded_sold = 0
|
||||||
excluded_type = 0
|
excluded_type = 0
|
||||||
@@ -223,45 +260,57 @@ def scrape():
|
|||||||
excluded_area = 0
|
excluded_area = 0
|
||||||
excluded_floor = 0
|
excluded_floor = 0
|
||||||
excluded_no_gps = 0
|
excluded_no_gps = 0
|
||||||
|
properties_fetched = 0
|
||||||
|
|
||||||
for listing in all_listings:
|
for listing in all_listings:
|
||||||
|
if max_properties and properties_fetched >= max_properties:
|
||||||
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
||||||
|
break
|
||||||
|
unit_name = listing.get("unit_name", "unknown")
|
||||||
# Only available units
|
# Only available units
|
||||||
if listing["free"] != "yes":
|
if listing["free"] != "yes":
|
||||||
excluded_sold += 1
|
excluded_sold += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (not free)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Only apartments (unittype=2)
|
# Only apartments (unittype=2)
|
||||||
if listing["unittype"] != 2:
|
if listing["unittype"] != 2:
|
||||||
excluded_type += 1
|
excluded_type += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (not apartment, unittype={listing['unittype']})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Only sales
|
# Only sales
|
||||||
if listing["transaction"] != "prodej":
|
if listing["transaction"] != "prodej":
|
||||||
excluded_type += 1
|
excluded_type += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (not sale, transaction={listing['transaction']})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Disposition
|
# Disposition
|
||||||
disp = listing["disposition"]
|
disp = listing["disposition"]
|
||||||
if disp not in WANTED_DISPOSITIONS:
|
if disp not in WANTED_DISPOSITIONS:
|
||||||
excluded_disp += 1
|
excluded_disp += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (disposition {disp})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Price
|
# Price
|
||||||
price = listing["price"]
|
price = listing["price"]
|
||||||
if price <= 0 or price > MAX_PRICE:
|
if price <= 0 or price > MAX_PRICE:
|
||||||
excluded_price += 1
|
excluded_price += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (price {price})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Area
|
# Area
|
||||||
area = listing["area"]
|
area = listing["area"]
|
||||||
if area < MIN_AREA:
|
if area < MIN_AREA:
|
||||||
excluded_area += 1
|
excluded_area += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (area {area} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Floor
|
# Floor
|
||||||
floor = listing["floor"]
|
floor = listing["floor"]
|
||||||
if floor is not None and floor < MIN_FLOOR:
|
if floor is not None and floor < MIN_FLOOR:
|
||||||
excluded_floor += 1
|
excluded_floor += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (floor {floor})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# GPS from project
|
# GPS from project
|
||||||
@@ -272,48 +321,81 @@ def scrape():
|
|||||||
|
|
||||||
if not gps:
|
if not gps:
|
||||||
excluded_no_gps += 1
|
excluded_no_gps += 1
|
||||||
|
logger.debug(f"Filter: {unit_name} - excluded (no GPS for project {slug})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
lat, lon = gps
|
lat, lon = gps
|
||||||
|
|
||||||
|
# locality: use project address from cell (e.g. "Žateckých 14") + city from GPS lookup
|
||||||
|
project_address = listing.get("project_address", "")
|
||||||
|
# derive city from slug (GPS lookup key)
|
||||||
|
city_map = {
|
||||||
|
"karlinske-namesti-5": "Praha 8",
|
||||||
|
"melnicka-12": "Praha 7",
|
||||||
|
"na-vaclavce-34": "Praha 5",
|
||||||
|
"nad-kajetankou-12": "Praha 6",
|
||||||
|
"vosmikovych-3": "Praha 9",
|
||||||
|
"zateckych-14": "Praha 2",
|
||||||
|
}
|
||||||
|
city_str = city_map.get(slug, "Praha")
|
||||||
|
locality_str = f"{project_address}, {city_str}" if project_address else city_str
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"hash_id": f"cityhome_{slug}_{listing['unit_name']}",
|
"hash_id": f"cityhome_{slug}_{listing['unit_name']}",
|
||||||
"name": f"Prodej bytu {disp} {area} m² — {listing['project_name']}",
|
"name": f"Prodej bytu {disp}, {int(area)} m² — {project_address}",
|
||||||
"price": price,
|
"price": price,
|
||||||
"price_formatted": format_price(price),
|
"price_formatted": format_price(price),
|
||||||
"locality": f"{listing['project_name']}, Praha",
|
"locality": locality_str,
|
||||||
"lat": lat,
|
"lat": lat,
|
||||||
"lon": lon,
|
"lon": lon,
|
||||||
"disposition": disp,
|
"disposition": disp,
|
||||||
"floor": floor,
|
"floor": floor,
|
||||||
"area": area,
|
"area": float(area),
|
||||||
"building_type": "Cihlová", # CityHome renovuje cihlové domy
|
"building_type": "Cihlová", # CityHome renovuje cihlové domy
|
||||||
"ownership": "neuvedeno",
|
"ownership": "neuvedeno",
|
||||||
"url": url,
|
"url": url,
|
||||||
"source": "cityhome",
|
"source": "cityhome",
|
||||||
"image": "",
|
"image": "",
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
properties_fetched += 1
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky CityHome:")
|
logger.info(f"Výsledky CityHome:")
|
||||||
print(f" Celkem jednotek: {len(all_listings)}")
|
logger.info(f" Celkem jednotek: {len(all_listings)}")
|
||||||
print(f" Vyloučeno (prodáno): {excluded_sold}")
|
logger.info(f" Vyloučeno (prodáno): {excluded_sold}")
|
||||||
print(f" Vyloučeno (typ): {excluded_type}")
|
logger.info(f" Vyloučeno (typ): {excluded_type}")
|
||||||
print(f" Vyloučeno (dispozice): {excluded_disp}")
|
logger.info(f" Vyloučeno (dispozice): {excluded_disp}")
|
||||||
print(f" Vyloučeno (cena): {excluded_price}")
|
logger.info(f" Vyloučeno (cena): {excluded_price}")
|
||||||
print(f" Vyloučeno (plocha): {excluded_area}")
|
logger.info(f" Vyloučeno (plocha): {excluded_area}")
|
||||||
print(f" Vyloučeno (patro): {excluded_floor}")
|
logger.info(f" Vyloučeno (patro): {excluded_floor}")
|
||||||
print(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from CityHome")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=None,
|
||||||
|
help="Maximum number of listing pages to scrape (not applicable for CityHome)")
|
||||||
|
parser.add_argument("--max-properties", type=int, default=None,
|
||||||
|
help="Maximum number of properties to include in results")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
json_path = Path("byty_cityhome.json")
|
json_path = Path("byty_cityhome.json")
|
||||||
@@ -322,7 +404,7 @@ if __name__ == "__main__":
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
|
||||||
print(f"⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"⏱ Celkový čas: {elapsed:.0f} s")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty z CityHome neodpovídají kritériím :(")
|
logger.info("\nŽádné byty z CityHome neodpovídají kritériím :(")
|
||||||
|
|||||||
126
scrape_idnes.py
126
scrape_idnes.py
@@ -6,7 +6,10 @@ Výstup: byty_idnes.json
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -15,6 +18,8 @@ import urllib.parse
|
|||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
MAX_PRICE = 13_500_000
|
MAX_PRICE = 13_500_000
|
||||||
@@ -51,17 +56,21 @@ def fetch_url(url: str) -> str:
|
|||||||
"""Fetch URL and return HTML string with retry logic."""
|
"""Fetch URL and return HTML string with retry logic."""
|
||||||
for attempt in range(MAX_RETRIES):
|
for attempt in range(MAX_RETRIES):
|
||||||
try:
|
try:
|
||||||
|
logger.debug(f"HTTP GET request (attempt {attempt + 1}/{MAX_RETRIES}): {url}")
|
||||||
|
logger.debug(f"Headers: {HEADERS}")
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(data)} bytes")
|
||||||
return data.decode("utf-8")
|
return data.decode("utf-8")
|
||||||
except (ConnectionResetError, ConnectionError, urllib.error.URLError,
|
except (ConnectionResetError, ConnectionError, urllib.error.URLError,
|
||||||
OSError) as e:
|
OSError) as e:
|
||||||
if attempt < MAX_RETRIES - 1:
|
if attempt < MAX_RETRIES - 1:
|
||||||
wait = (attempt + 1) * 3 # 3, 6, 9, 12s
|
wait = (attempt + 1) * 3 # 3, 6, 9, 12s
|
||||||
print(f" Retry {attempt + 1}/{MAX_RETRIES} (wait {wait}s): {e}")
|
logger.warning(f"Connection error (retry {attempt + 1}/{MAX_RETRIES} after {wait}s): {e}")
|
||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
else:
|
else:
|
||||||
|
logger.error(f"HTTP request failed after {MAX_RETRIES} attempts: {e}", exc_info=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@@ -269,38 +278,47 @@ def load_cache(json_path: str = "byty_idnes.json") -> dict[str, dict]:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
|
||||||
print("=" * 60)
|
logger.info("=" * 60)
|
||||||
print("Stahuji inzeráty z Reality iDNES")
|
logger.info("Stahuji inzeráty z Reality iDNES")
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
print(f"Min. plocha: {MIN_AREA} m²")
|
logger.info(f"Min. plocha: {MIN_AREA} m²")
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
print(f"Region: Praha")
|
logger.info(f"Region: Praha")
|
||||||
if cache:
|
if cache:
|
||||||
print(f"Cache: {len(cache)} bytů z minulého běhu")
|
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
|
||||||
print("=" * 60)
|
if max_pages:
|
||||||
|
logger.info(f"Max. stran: {max_pages}")
|
||||||
|
if max_properties:
|
||||||
|
logger.info(f"Max. bytů: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Step 1: Fetch listing pages
|
# Step 1: Fetch listing pages
|
||||||
print("\nFáze 1: Stahování seznamu inzerátů...")
|
logger.info("\nFáze 1: Stahování seznamu inzerátů...")
|
||||||
all_listings = {} # id -> listing dict
|
all_listings = {} # id -> listing dict
|
||||||
page = 0
|
page = 0
|
||||||
total = None
|
total = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
if max_pages and page >= max_pages:
|
||||||
|
logger.debug(f"Max pages limit reached: {max_pages}")
|
||||||
|
break
|
||||||
url = build_list_url(page)
|
url = build_list_url(page)
|
||||||
print(f" Strana {page + 1} ...")
|
logger.info(f"Strana {page + 1} ...")
|
||||||
html = fetch_url(url)
|
html = fetch_url(url)
|
||||||
|
|
||||||
if total is None:
|
if total is None:
|
||||||
total = parse_total_count(html)
|
total = parse_total_count(html)
|
||||||
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 1
|
total_pages = math.ceil(total / PER_PAGE) if total > 0 else 1
|
||||||
print(f" → Celkem {total} inzerátů, ~{total_pages} stran")
|
logger.info(f"→ Celkem {total} inzerátů, ~{total_pages} stran")
|
||||||
|
|
||||||
listings = parse_listings(html)
|
listings = parse_listings(html)
|
||||||
|
logger.debug(f"Page {page}: found {len(listings)} listings")
|
||||||
|
|
||||||
if not listings:
|
if not listings:
|
||||||
|
logger.debug(f"No listings found on page {page}, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
for item in listings:
|
for item in listings:
|
||||||
@@ -313,7 +331,7 @@ def scrape():
|
|||||||
break
|
break
|
||||||
time.sleep(1.0)
|
time.sleep(1.0)
|
||||||
|
|
||||||
print(f"\n Staženo: {len(all_listings)} unikátních inzerátů")
|
logger.info(f"\nStaženo: {len(all_listings)} unikátních inzerátů")
|
||||||
|
|
||||||
# Step 2: Pre-filter by price and area from list data
|
# Step 2: Pre-filter by price and area from list data
|
||||||
pre_filtered = []
|
pre_filtered = []
|
||||||
@@ -322,40 +340,49 @@ def scrape():
|
|||||||
excluded_disp = 0
|
excluded_disp = 0
|
||||||
|
|
||||||
for item in all_listings.values():
|
for item in all_listings.values():
|
||||||
|
item_id = item["id"]
|
||||||
if item["price"] <= 0 or item["price"] > MAX_PRICE:
|
if item["price"] <= 0 or item["price"] > MAX_PRICE:
|
||||||
excluded_price += 1
|
excluded_price += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (price {item['price']})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item["area"] is not None and item["area"] < MIN_AREA:
|
if item["area"] is not None and item["area"] < MIN_AREA:
|
||||||
excluded_area += 1
|
excluded_area += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (area {item['area']} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item["disposition"] == "?":
|
if item["disposition"] == "?":
|
||||||
excluded_disp += 1
|
excluded_disp += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (unknown disposition)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pre_filtered.append(item)
|
pre_filtered.append(item)
|
||||||
|
|
||||||
print(f"\nPo předfiltraci:")
|
logger.info(f"\nPo předfiltraci:")
|
||||||
print(f" Vyloučeno (cena): {excluded_price}")
|
logger.info(f" Vyloučeno (cena): {excluded_price}")
|
||||||
print(f" Vyloučeno (plocha): {excluded_area}")
|
logger.info(f" Vyloučeno (plocha): {excluded_area}")
|
||||||
print(f" Vyloučeno (dispozice): {excluded_disp}")
|
logger.info(f" Vyloučeno (dispozice): {excluded_disp}")
|
||||||
print(f" Zbývá: {len(pre_filtered)}")
|
logger.info(f" Zbývá: {len(pre_filtered)}")
|
||||||
|
|
||||||
# Step 3: Fetch details for GPS, floor, construction
|
# Step 3: Fetch details for GPS, floor, construction
|
||||||
print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
||||||
results = []
|
results = []
|
||||||
excluded_panel = 0
|
excluded_panel = 0
|
||||||
excluded_floor = 0
|
excluded_floor = 0
|
||||||
excluded_no_gps = 0
|
excluded_no_gps = 0
|
||||||
excluded_detail = 0
|
excluded_detail = 0
|
||||||
cache_hits = 0
|
cache_hits = 0
|
||||||
|
properties_fetched = 0
|
||||||
|
|
||||||
for i, item in enumerate(pre_filtered):
|
for i, item in enumerate(pre_filtered):
|
||||||
|
if max_properties and properties_fetched >= max_properties:
|
||||||
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
||||||
|
break
|
||||||
# Check cache — if hash_id exists and price unchanged, reuse
|
# Check cache — if hash_id exists and price unchanged, reuse
|
||||||
cached = cache.get(str(item["id"]))
|
cached = cache.get(str(item["id"]))
|
||||||
if cached and cached.get("price") == item["price"]:
|
if cached and cached.get("price") == item["price"]:
|
||||||
cache_hits += 1
|
cache_hits += 1
|
||||||
|
logger.debug(f"Cache hit for id={item['id']}")
|
||||||
results.append(cached)
|
results.append(cached)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -365,34 +392,39 @@ def scrape():
|
|||||||
try:
|
try:
|
||||||
html = fetch_url(url)
|
html = fetch_url(url)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Warning: detail failed for {item['id']}: {e}")
|
|
||||||
excluded_detail += 1
|
excluded_detail += 1
|
||||||
|
logger.warning(f"Detail failed for id={item['id']}: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
detail = parse_detail(html)
|
detail = parse_detail(html)
|
||||||
|
logger.debug(f"Detail parsed for id={item['id']}: lat={detail.get('lat')}, lon={detail.get('lon')}, floor={detail.get('floor')}")
|
||||||
|
|
||||||
# Must have GPS
|
# Must have GPS
|
||||||
if not detail.get("lat") or not detail.get("lon"):
|
if not detail.get("lat") or not detail.get("lon"):
|
||||||
excluded_no_gps += 1
|
excluded_no_gps += 1
|
||||||
|
logger.debug(f"Filter: id={item['id']} - excluded (no GPS)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check construction — exclude panel
|
# Check construction — exclude panel
|
||||||
construction = detail.get("construction", "")
|
construction = detail.get("construction", "")
|
||||||
if "panel" in construction:
|
if "panel" in construction:
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen {item['id'][:12]}...: panel ({construction})")
|
logger.debug(f"Filter: id={item['id']} - excluded (panel construction)")
|
||||||
|
logger.info(f"✗ Vyloučen {item['id'][:12]}...: panel ({construction})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for sídliště in construction/description
|
# Check for sídliště in construction/description
|
||||||
if "sídliště" in construction or "sidliste" in construction:
|
if "sídliště" in construction or "sidliste" in construction:
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen {item['id'][:12]}...: sídliště")
|
logger.debug(f"Filter: id={item['id']} - excluded (housing estate)")
|
||||||
|
logger.info(f"✗ Vyloučen {item['id'][:12]}...: sídliště")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check floor
|
# Check floor
|
||||||
floor = detail.get("floor")
|
floor = detail.get("floor")
|
||||||
if floor is not None and floor < MIN_FLOOR:
|
if floor is not None and floor < MIN_FLOOR:
|
||||||
excluded_floor += 1
|
excluded_floor += 1
|
||||||
|
logger.debug(f"Filter: id={item['id']} - excluded (floor {floor})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Map construction to Czech label
|
# Map construction to Czech label
|
||||||
@@ -427,29 +459,47 @@ def scrape():
|
|||||||
"url": item["url"],
|
"url": item["url"],
|
||||||
"source": "idnes",
|
"source": "idnes",
|
||||||
"image": "",
|
"image": "",
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
properties_fetched += 1
|
||||||
|
|
||||||
if (i + 1) % 20 == 0:
|
if (i + 1) % 20 == 0:
|
||||||
print(f" Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky Reality iDNES:")
|
logger.info(f"Výsledky Reality iDNES:")
|
||||||
print(f" Předfiltrováno: {len(pre_filtered)}")
|
logger.info(f" Předfiltrováno: {len(pre_filtered)}")
|
||||||
print(f" Z cache (přeskočeno): {cache_hits}")
|
logger.info(f" Z cache (přeskočeno): {cache_hits}")
|
||||||
print(f" Vyloučeno (panel/síd): {excluded_panel}")
|
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
|
||||||
print(f" Vyloučeno (patro): {excluded_floor}")
|
logger.info(f" Vyloučeno (patro): {excluded_floor}")
|
||||||
print(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
||||||
print(f" Vyloučeno (bez detailu): {excluded_detail}")
|
logger.info(f" Vyloučeno (bez detailu): {excluded_detail}")
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from Reality iDNES")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=None,
|
||||||
|
help="Maximum number of listing pages to scrape")
|
||||||
|
parser.add_argument("--max-properties", type=int, default=None,
|
||||||
|
help="Maximum number of properties to fetch details for")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
json_path = Path("byty_idnes.json")
|
json_path = Path("byty_idnes.json")
|
||||||
@@ -458,7 +508,7 @@ if __name__ == "__main__":
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
|
||||||
print(f"⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"⏱ Celkový čas: {elapsed:.0f} s")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty z Reality iDNES neodpovídají kritériím :(")
|
logger.info("\nŽádné byty z Reality iDNES neodpovídají kritériím :(")
|
||||||
|
|||||||
363
scrape_psn.py
363
scrape_psn.py
@@ -1,16 +1,22 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
PSN.cz scraper.
|
PSN.cz scraper.
|
||||||
Stáhne byty na prodej v Praze z projektů PSN a vyfiltruje podle kritérií.
|
Stáhne byty na prodej z API /api/units-list — jeden požadavek, žádné stránkování.
|
||||||
Výstup: byty_psn.json
|
Výstup: byty_psn.json
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
# ── Konfigurace ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -18,78 +24,37 @@ MAX_PRICE = 14_000_000
|
|||||||
MIN_AREA = 69
|
MIN_AREA = 69
|
||||||
MIN_FLOOR = 2
|
MIN_FLOOR = 2
|
||||||
|
|
||||||
WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
|
WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1", "5+kk a větší"}
|
||||||
|
|
||||||
|
# Pouze Praha — ostatní města (Brno, Pardubice, Špindlerův Mlýn) přeskočit
|
||||||
|
WANTED_CITIES = {"Praha"}
|
||||||
|
|
||||||
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
BASE_URL = "https://psn.cz"
|
BASE_URL = "https://psn.cz"
|
||||||
|
UNITS_API = f"{BASE_URL}/api/units-list"
|
||||||
# Known Prague project slugs with GPS (from research)
|
|
||||||
PRAGUE_PROJECTS = [
|
|
||||||
{"slug": "zit-branik", "name": "Žít Braník", "lat": 50.0353, "lon": 14.4125},
|
|
||||||
{"slug": "rostislavova-4", "name": "Rostislavova 4", "lat": 50.0620, "lon": 14.4463},
|
|
||||||
{"slug": "pod-drinopolem", "name": "Pod Drinopolem", "lat": 50.0851, "lon": 14.3720},
|
|
||||||
{"slug": "skyline-chodov", "name": "Skyline Chodov", "lat": 50.0418, "lon": 14.4990},
|
|
||||||
{"slug": "jitro", "name": "Jitro", "lat": 50.0729, "lon": 14.4768},
|
|
||||||
{"slug": "maroldka", "name": "Maroldka", "lat": 50.0614, "lon": 14.4517},
|
|
||||||
{"slug": "belehradska-29", "name": "Bělehradská 29", "lat": 50.0682, "lon": 14.4348},
|
|
||||||
{"slug": "jeseniova-93", "name": "Jeseniova 93", "lat": 50.0887, "lon": 14.4692},
|
|
||||||
{"slug": "vanguard", "name": "Vanguard", "lat": 50.0164, "lon": 14.4036},
|
|
||||||
{"slug": "vinohradska-160", "name": "Vinohradská 160", "lat": 50.0780, "lon": 14.4653},
|
|
||||||
{"slug": "hermanova24", "name": "Heřmanova 24", "lat": 50.1009, "lon": 14.4313},
|
|
||||||
{"slug": "vinohradska-8", "name": "Vinohradská 8", "lat": 50.0787, "lon": 14.4342},
|
|
||||||
{"slug": "bydleni-na-vysinach", "name": "Bydlení Na Výšinách", "lat": 50.1003, "lon": 14.4187},
|
|
||||||
{"slug": "bydleni-u-pekaren", "name": "Bydlení U Pekáren", "lat": 50.0555, "lon": 14.5414},
|
|
||||||
{"slug": "pechackova-6", "name": "Pechackova 6", "lat": 50.0734, "lon": 14.4063},
|
|
||||||
{"slug": "ahoj-vanguard", "name": "Ahoj Vanguard", "lat": 50.0164, "lon": 14.4033},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_url(url: str) -> str:
|
def fetch_json(url: str) -> dict:
|
||||||
"""Fetch URL via curl (urllib SSL too old for Cloudflare)."""
|
"""Fetch JSON via curl (urllib SSL may fail on Cloudflare)."""
|
||||||
|
logger.debug(f"HTTP GET: {url}")
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["curl", "-s", "-L", "--max-time", "30",
|
["curl", "-s", "-L", "--max-time", "30",
|
||||||
"-H", f"User-Agent: {UA}",
|
"-H", f"User-Agent: {UA}",
|
||||||
"-H", "Accept: text/html",
|
"-H", "Accept: application/json",
|
||||||
url],
|
url],
|
||||||
capture_output=True, text=True, timeout=60
|
capture_output=True, text=True, timeout=60
|
||||||
)
|
)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
raise RuntimeError(f"curl failed ({result.returncode}): {result.stderr[:200]}")
|
raise RuntimeError(f"curl failed ({result.returncode}): {result.stderr[:200]}")
|
||||||
return result.stdout
|
return json.loads(result.stdout)
|
||||||
|
|
||||||
|
|
||||||
def extract_units_from_html(html: str) -> list[dict]:
|
def fix_gps(lat, lng):
|
||||||
"""Extract unit JSON objects from raw HTML with escaped quotes."""
|
"""PSN má u některých projektů prohozené lat/lng — opravíme."""
|
||||||
# The HTML contains RSC data with escaped JSON: \\"key\\":\\"value\\"
|
if lat is not None and lng is not None and lat < 20 and lng > 20:
|
||||||
# Step 1: Unescape the double-backslash-quotes to regular quotes
|
return lng, lat
|
||||||
cleaned = html.replace('\\"', '"')
|
return lat, lng
|
||||||
|
|
||||||
# Step 2: Find each unit by looking for "title":"Byt and walking back to {
|
|
||||||
units = []
|
|
||||||
decoder = json.JSONDecoder()
|
|
||||||
|
|
||||||
for m in re.finditer(r'"title":"Byt', cleaned):
|
|
||||||
pos = m.start()
|
|
||||||
# Walk backwards to find the opening brace
|
|
||||||
depth = 0
|
|
||||||
found = False
|
|
||||||
for i in range(pos - 1, max(pos - 3000, 0), -1):
|
|
||||||
if cleaned[i] == '}':
|
|
||||||
depth += 1
|
|
||||||
elif cleaned[i] == '{':
|
|
||||||
if depth == 0:
|
|
||||||
try:
|
|
||||||
obj, end = decoder.raw_decode(cleaned, i)
|
|
||||||
if isinstance(obj, dict) and 'price_czk' in obj:
|
|
||||||
units.append(obj)
|
|
||||||
found = True
|
|
||||||
except (json.JSONDecodeError, ValueError):
|
|
||||||
pass
|
|
||||||
break
|
|
||||||
depth -= 1
|
|
||||||
|
|
||||||
return units
|
|
||||||
|
|
||||||
|
|
||||||
def format_price(price: int) -> str:
|
def format_price(price: int) -> str:
|
||||||
@@ -101,197 +66,203 @@ def format_price(price: int) -> str:
|
|||||||
return " ".join(reversed(parts)) + " Kč"
|
return " ".join(reversed(parts)) + " Kč"
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape(max_properties: int | None = None):
|
||||||
print("=" * 60)
|
logger.info("=" * 60)
|
||||||
print("Stahuji inzeráty z PSN.cz")
|
logger.info("Stahuji inzeráty z PSN.cz")
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
print(f"Min. plocha: {MIN_AREA} m²")
|
logger.info(f"Min. plocha: {MIN_AREA} m²")
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
print(f"Region: Praha ({len(PRAGUE_PROJECTS)} projektů)")
|
logger.info(f"Region: Praha")
|
||||||
print("=" * 60)
|
if max_properties:
|
||||||
|
logger.info(f"Max. bytů: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Fetch units from each Prague project
|
# Jediný API požadavek — vrátí všechny jednotky (cca 236)
|
||||||
all_units = []
|
params = urlencode({
|
||||||
|
"locale": "cs",
|
||||||
|
"filters": "{}",
|
||||||
|
"type": "list",
|
||||||
|
"order": "price-asc",
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 500,
|
||||||
|
})
|
||||||
|
url = f"{UNITS_API}?{params}"
|
||||||
|
logger.info("Stahuji jednotky z API ...")
|
||||||
|
|
||||||
for proj in PRAGUE_PROJECTS:
|
try:
|
||||||
page = 1
|
data = fetch_json(url)
|
||||||
project_units = []
|
except Exception as e:
|
||||||
|
logger.error(f"Chyba při stahování: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
while True:
|
all_units = data.get("units", {}).get("data", [])
|
||||||
url = f"{BASE_URL}/projekt/{proj['slug']}?page={page}"
|
logger.info(f"Staženo jednotek celkem: {len(all_units)}")
|
||||||
print(f" {proj['name']} — strana {page} ...")
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
try:
|
# Filtrování
|
||||||
html = fetch_url(url)
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Chyba: {e}")
|
|
||||||
break
|
|
||||||
|
|
||||||
units = extract_units_from_html(html)
|
|
||||||
|
|
||||||
if not units:
|
|
||||||
if page == 1:
|
|
||||||
print(f" → 0 jednotek")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Add project info to each unit
|
|
||||||
for unit in units:
|
|
||||||
if not unit.get("latitude") or not unit.get("longitude"):
|
|
||||||
unit["latitude"] = proj["lat"]
|
|
||||||
unit["longitude"] = proj["lon"]
|
|
||||||
unit["_project_name"] = proj["name"]
|
|
||||||
unit["_project_slug"] = proj["slug"]
|
|
||||||
|
|
||||||
project_units.extend(units)
|
|
||||||
|
|
||||||
if page == 1:
|
|
||||||
print(f" → {len(units)} jednotek na stránce")
|
|
||||||
|
|
||||||
# Check if there might be more pages
|
|
||||||
# If we got fewer than expected or same units, stop
|
|
||||||
if len(units) < 10:
|
|
||||||
break
|
|
||||||
|
|
||||||
page += 1
|
|
||||||
if page > 10: # Safety limit
|
|
||||||
break
|
|
||||||
|
|
||||||
all_units.extend(project_units)
|
|
||||||
|
|
||||||
# Deduplicate by slug
|
|
||||||
seen_slugs = set()
|
|
||||||
unique_units = []
|
|
||||||
for u in all_units:
|
|
||||||
slug = u.get("slug", "")
|
|
||||||
if slug and slug not in seen_slugs:
|
|
||||||
seen_slugs.add(slug)
|
|
||||||
unique_units.append(u)
|
|
||||||
elif not slug:
|
|
||||||
unique_units.append(u)
|
|
||||||
|
|
||||||
print(f"\n Staženo celkem: {len(unique_units)} unikátních jednotek")
|
|
||||||
|
|
||||||
# Filter
|
|
||||||
print(f"\nFiltrování...")
|
|
||||||
results = []
|
results = []
|
||||||
excluded_sold = 0
|
excluded = {
|
||||||
excluded_type = 0
|
"prodáno": 0,
|
||||||
excluded_disp = 0
|
"typ": 0,
|
||||||
excluded_price = 0
|
"město": 0,
|
||||||
excluded_area = 0
|
"dispozice": 0,
|
||||||
excluded_floor = 0
|
"cena": 0,
|
||||||
excluded_panel = 0
|
"plocha": 0,
|
||||||
|
"patro": 0,
|
||||||
|
}
|
||||||
|
properties_fetched = 0
|
||||||
|
|
||||||
for unit in unique_units:
|
for unit in all_units:
|
||||||
# Only free units
|
if max_properties and properties_fetched >= max_properties:
|
||||||
|
break
|
||||||
|
|
||||||
|
unit_id = unit.get("id", "?")
|
||||||
|
|
||||||
|
# Pouze prodej bytů (type_id=0)
|
||||||
|
if unit.get("type_id") != 0:
|
||||||
|
excluded["typ"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (type_id={unit.get('type_id')}, není prodej bytu)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Pouze volné (ne rezervované, prodané, v přípravě)
|
||||||
|
sale_status = unit.get("sale_status", "")
|
||||||
is_free = unit.get("is_free", False)
|
is_free = unit.get("is_free", False)
|
||||||
is_sold = unit.get("is_sold", False)
|
is_sold = unit.get("is_sold", False)
|
||||||
if is_sold or not is_free:
|
if is_sold or not is_free:
|
||||||
excluded_sold += 1
|
excluded["prodáno"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (status={sale_status})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Only apartments
|
# Pouze Praha
|
||||||
category = str(unit.get("category", "")).lower()
|
city = (unit.get("location") or unit.get("address", {}).get("city") or "").strip()
|
||||||
if "byt" not in category and "ateliér" not in category:
|
# location field je typicky "Praha 4", "Praha 7" atd.
|
||||||
excluded_type += 1
|
city_base = city.split(" ")[0] if city else ""
|
||||||
|
if city_base not in WANTED_CITIES:
|
||||||
|
excluded["město"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (město={city})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Disposition
|
# Dispozice
|
||||||
disp = unit.get("disposition", "")
|
disp = unit.get("disposition", "")
|
||||||
if disp not in WANTED_DISPOSITIONS:
|
if disp not in WANTED_DISPOSITIONS:
|
||||||
excluded_disp += 1
|
excluded["dispozice"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (dispozice={disp})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Price
|
# Cena
|
||||||
price = unit.get("price_czk") or unit.get("action_price_czk") or 0
|
price = unit.get("action_price_czk") or unit.get("price_czk") or 0
|
||||||
if price <= 0 or price > MAX_PRICE:
|
if not price or price <= 0 or price > MAX_PRICE:
|
||||||
excluded_price += 1
|
excluded["cena"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (cena={price})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Area
|
# Plocha
|
||||||
area = unit.get("total_area") or unit.get("floor_area") or 0
|
area = unit.get("total_area") or unit.get("floor_area") or 0
|
||||||
if area < MIN_AREA:
|
if area < MIN_AREA:
|
||||||
excluded_area += 1
|
excluded["plocha"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (plocha={area} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Floor
|
# Patro
|
||||||
floor_str = str(unit.get("floor", ""))
|
floor_str = str(unit.get("floor", ""))
|
||||||
floor = None
|
floor = None
|
||||||
if floor_str:
|
if floor_str:
|
||||||
try:
|
try:
|
||||||
floor = int(floor_str)
|
floor = int(floor_str)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
floor_match = re.search(r'(-?\d+)', floor_str)
|
m = re.search(r'(-?\d+)', floor_str)
|
||||||
if floor_match:
|
if m:
|
||||||
floor = int(floor_match.group(1))
|
floor = int(m.group(1))
|
||||||
|
|
||||||
if floor is not None and floor < MIN_FLOOR:
|
if floor is not None and floor < MIN_FLOOR:
|
||||||
excluded_floor += 1
|
excluded["patro"] += 1
|
||||||
|
logger.debug(f"id={unit_id}: přeskočen (patro={floor})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Construction — check for panel
|
# GPS — opravit prohozené souřadnice
|
||||||
build_type = str(unit.get("build_type", "")).lower()
|
lat_raw = unit.get("latitude")
|
||||||
if "panel" in build_type:
|
lng_raw = unit.get("longitude")
|
||||||
excluded_panel += 1
|
lat, lng = fix_gps(lat_raw, lng_raw)
|
||||||
print(f" ✗ Vyloučen: panel ({build_type})")
|
if not lat or not lng:
|
||||||
|
logger.warning(f"id={unit_id}: chybí GPS souřadnice, přeskakuji")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Build construction label
|
# Sestavit adresu pro locality
|
||||||
building_type = "neuvedeno"
|
addr = unit.get("address") or {}
|
||||||
if build_type and build_type != "nevybráno":
|
street = addr.get("street", "")
|
||||||
if "cihlo" in build_type or "cihla" in build_type:
|
street_no = addr.get("street_no", "")
|
||||||
building_type = "Cihlová"
|
if street and street_no:
|
||||||
elif "skelet" in build_type:
|
locality_str = f"{street} {street_no}, {city}"
|
||||||
building_type = "Skeletová"
|
elif street:
|
||||||
else:
|
locality_str = f"{street}, {city}"
|
||||||
building_type = build_type.capitalize()
|
else:
|
||||||
|
project_name = unit.get("project", "")
|
||||||
|
locality_str = f"{project_name}, {city}" if project_name else city
|
||||||
|
|
||||||
lat = unit.get("latitude", 0)
|
# URL na detail jednotky
|
||||||
lon = unit.get("longitude", 0)
|
unit_slug = unit.get("slug", "")
|
||||||
|
project_slug = ""
|
||||||
slug = unit.get("slug", "")
|
# project_slug lze odvodit z projektu nebo z reference_no
|
||||||
project_slug = unit.get("_project_slug", "")
|
# API nevrací project_slug přímo — použijeme reference_no nebo jen ID
|
||||||
detail_url = f"{BASE_URL}/projekt/{project_slug}/{slug}" if slug else f"{BASE_URL}/projekt/{project_slug}"
|
reference_no = unit.get("reference_no", "")
|
||||||
|
if unit_slug:
|
||||||
|
detail_url = f"{BASE_URL}/prodej/{unit_slug}"
|
||||||
|
elif reference_no:
|
||||||
|
detail_url = f"{BASE_URL}/prodej/{reference_no}"
|
||||||
|
else:
|
||||||
|
detail_url = BASE_URL
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"hash_id": unit.get("id", slug),
|
"hash_id": str(unit_id),
|
||||||
"name": f"Prodej bytu {disp} {area} m² — {unit.get('_project_name', '')}",
|
"name": f"Prodej bytu {disp}, {int(area)} m² — {unit.get('project', locality_str)}",
|
||||||
"price": int(price),
|
"price": int(price),
|
||||||
"price_formatted": format_price(int(price)),
|
"price_formatted": format_price(int(price)),
|
||||||
"locality": f"{unit.get('street', unit.get('_project_name', ''))}, Praha",
|
"locality": locality_str,
|
||||||
"lat": lat,
|
"lat": lat,
|
||||||
"lon": lon,
|
"lon": lng,
|
||||||
"disposition": disp,
|
"disposition": disp,
|
||||||
"floor": floor,
|
"floor": floor,
|
||||||
"area": area,
|
"area": float(area),
|
||||||
"building_type": building_type,
|
"building_type": "neuvedeno",
|
||||||
"ownership": unit.get("ownership", "neuvedeno") or "neuvedeno",
|
"ownership": "osobní",
|
||||||
"url": detail_url,
|
"url": detail_url,
|
||||||
"source": "psn",
|
"source": "psn",
|
||||||
"image": "",
|
"image": "",
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
properties_fetched += 1
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky PSN:")
|
logger.info(f"Výsledky PSN:")
|
||||||
print(f" Celkem jednotek: {len(unique_units)}")
|
logger.info(f" Staženo inzerátů: {len(all_units)}")
|
||||||
print(f" Vyloučeno (prodáno): {excluded_sold}")
|
for reason, count in excluded.items():
|
||||||
print(f" Vyloučeno (typ): {excluded_type}")
|
if count:
|
||||||
print(f" Vyloučeno (dispozice): {excluded_disp}")
|
logger.info(f" Vyloučeno ({reason}): {count}")
|
||||||
print(f" Vyloučeno (cena): {excluded_price}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f" Vyloučeno (plocha): {excluded_area}")
|
logger.info(f"{'=' * 60}")
|
||||||
print(f" Vyloučeno (patro): {excluded_floor}")
|
|
||||||
print(f" Vyloučeno (panel): {excluded_panel}")
|
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
|
||||||
print(f"{'=' * 60}")
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from PSN.cz")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=None,
|
||||||
|
help="Ignored — PSN uses a single API call, no pagination")
|
||||||
|
parser.add_argument("--max-properties", type=int, default=None,
|
||||||
|
help="Maximum number of properties to include in results")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO",
|
||||||
|
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
json_path = Path("byty_psn.json")
|
json_path = Path("byty_psn.json")
|
||||||
@@ -300,7 +271,7 @@ if __name__ == "__main__":
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
|
||||||
print(f"⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"⏱ Celkový čas: {elapsed:.1f} s")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty z PSN neodpovídají kritériím :(")
|
logger.info("\nŽádné byty z PSN neodpovídají kritériím :(")
|
||||||
|
|||||||
@@ -6,13 +6,18 @@ Výstup: byty_realingo.json
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Konfigurace (sdílená se Sreality scraperem) ─────────────────────────────
|
# ── Konfigurace (sdílená se Sreality scraperem) ─────────────────────────────
|
||||||
|
|
||||||
MAX_PRICE = 13_500_000
|
MAX_PRICE = 13_500_000
|
||||||
@@ -55,44 +60,57 @@ def fetch_listing_page(page: int = 1) -> tuple[list[dict], int]:
|
|||||||
else:
|
else:
|
||||||
url = f"{BASE_URL}/prodej_byty/praha/{page}_strana/"
|
url = f"{BASE_URL}/prodej_byty/praha/{page}_strana/"
|
||||||
|
|
||||||
|
logger.debug(f"HTTP GET request: {url}")
|
||||||
|
logger.debug(f"Headers: {HEADERS}")
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
|
||||||
html = resp.read().decode("utf-8")
|
|
||||||
|
|
||||||
match = re.search(
|
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
|
||||||
html, re.DOTALL
|
|
||||||
)
|
|
||||||
if not match:
|
|
||||||
return [], 0
|
|
||||||
|
|
||||||
data = json.loads(match.group(1))
|
|
||||||
offer_list = data["props"]["pageProps"]["store"]["offer"]["list"]
|
|
||||||
return offer_list["data"], offer_list["total"]
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_detail(listing_url: str) -> dict | None:
|
|
||||||
"""Fetch detail page for a listing to get floor, building type, etc."""
|
|
||||||
try:
|
try:
|
||||||
url = f"{BASE_URL}{listing_url}"
|
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
|
||||||
resp = urllib.request.urlopen(req, timeout=30)
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
html = resp.read().decode("utf-8")
|
html = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
|
||||||
|
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
||||||
html, re.DOTALL
|
html, re.DOTALL
|
||||||
)
|
)
|
||||||
if not match:
|
if not match:
|
||||||
|
logger.debug("No __NEXT_DATA__ script found in HTML")
|
||||||
|
return [], 0
|
||||||
|
|
||||||
|
data = json.loads(match.group(1))
|
||||||
|
offer_list = data["props"]["pageProps"]["store"]["offer"]["list"]
|
||||||
|
logger.debug(f"Page {page}: found {len(offer_list['data'])} items, total={offer_list['total']}")
|
||||||
|
return offer_list["data"], offer_list["total"]
|
||||||
|
except (urllib.error.URLError, ConnectionError, OSError) as e:
|
||||||
|
logger.error(f"HTTP request failed for {url}: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_detail(listing_url: str) -> dict | None:
|
||||||
|
"""Fetch detail page for a listing to get floor, building type, etc."""
|
||||||
|
try:
|
||||||
|
url = f"{BASE_URL}{listing_url}"
|
||||||
|
logger.debug(f"HTTP GET request: {url}")
|
||||||
|
req = urllib.request.Request(url, headers=HEADERS)
|
||||||
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
|
html = resp.read().decode("utf-8")
|
||||||
|
logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
||||||
|
html, re.DOTALL
|
||||||
|
)
|
||||||
|
if not match:
|
||||||
|
logger.debug("No __NEXT_DATA__ script found in detail page")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
data = json.loads(match.group(1))
|
data = json.loads(match.group(1))
|
||||||
details = data["props"]["pageProps"]["store"]["offer"]["details"]
|
details = data["props"]["pageProps"]["store"]["offer"]["details"]
|
||||||
# Get first (only) detail entry
|
# Get first (only) detail entry
|
||||||
for detail_data in details.values():
|
for detail_data in details.values():
|
||||||
|
logger.debug(f"Detail fetched for {listing_url}")
|
||||||
return detail_data
|
return detail_data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Warning: detail fetch failed for {listing_url}: {e}")
|
logger.warning(f"Detail fetch failed for {listing_url}: {e}", exc_info=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -117,34 +135,42 @@ def load_cache(json_path: str = "byty_realingo.json") -> dict[int, dict]:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
|
||||||
print("=" * 60)
|
logger.info("=" * 60)
|
||||||
print("Stahuji inzeráty z Realingo.cz")
|
logger.info("Stahuji inzeráty z Realingo.cz")
|
||||||
print(f"Cena: do {format_price(MAX_PRICE)}")
|
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||||
print(f"Min. plocha: {MIN_AREA} m²")
|
logger.info(f"Min. plocha: {MIN_AREA} m²")
|
||||||
print(f"Patro: od {MIN_FLOOR}. NP")
|
logger.info(f"Patro: od {MIN_FLOOR}. NP")
|
||||||
print(f"Region: Praha")
|
logger.info(f"Region: Praha")
|
||||||
if cache:
|
if cache:
|
||||||
print(f"Cache: {len(cache)} bytů z minulého běhu")
|
logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
|
||||||
print("=" * 60)
|
if max_pages:
|
||||||
|
logger.info(f"Max. stran: {max_pages}")
|
||||||
|
if max_properties:
|
||||||
|
logger.info(f"Max. bytů: {max_properties}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
# Step 1: Fetch all listing pages
|
# Step 1: Fetch all listing pages
|
||||||
print("\nFáze 1: Stahování seznamu inzerátů...")
|
logger.info("\nFáze 1: Stahování seznamu inzerátů...")
|
||||||
all_listings = []
|
all_listings = []
|
||||||
page = 1
|
page = 1
|
||||||
total = None
|
total = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
print(f" Strana {page} ...")
|
if max_pages and page > max_pages:
|
||||||
|
logger.debug(f"Max pages limit reached: {max_pages}")
|
||||||
|
break
|
||||||
|
logger.info(f"Strana {page} ...")
|
||||||
items, total_count = fetch_listing_page(page)
|
items, total_count = fetch_listing_page(page)
|
||||||
if total is None:
|
if total is None:
|
||||||
total = total_count
|
total = total_count
|
||||||
total_pages = math.ceil(total / PER_PAGE)
|
total_pages = math.ceil(total / PER_PAGE)
|
||||||
print(f" → Celkem {total} inzerátů, {total_pages} stran")
|
logger.info(f"→ Celkem {total} inzerátů, {total_pages} stran")
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
|
logger.debug(f"No items found on page {page}, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
all_listings.extend(items)
|
all_listings.extend(items)
|
||||||
@@ -153,7 +179,7 @@ def scrape():
|
|||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
print(f"\n Staženo: {len(all_listings)} inzerátů")
|
logger.info(f"\nStaženo: {len(all_listings)} inzerátů")
|
||||||
|
|
||||||
# Step 2: Pre-filter by category, price, area from listing data
|
# Step 2: Pre-filter by category, price, area from listing data
|
||||||
pre_filtered = []
|
pre_filtered = []
|
||||||
@@ -163,50 +189,60 @@ def scrape():
|
|||||||
excluded_no_gps = 0
|
excluded_no_gps = 0
|
||||||
|
|
||||||
for item in all_listings:
|
for item in all_listings:
|
||||||
|
item_id = item.get("id")
|
||||||
cat = item.get("category", "")
|
cat = item.get("category", "")
|
||||||
if cat not in WANTED_CATEGORIES:
|
if cat not in WANTED_CATEGORIES:
|
||||||
excluded_category += 1
|
excluded_category += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (category {cat})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
price = item.get("price", {}).get("total", 0) or 0
|
price = item.get("price", {}).get("total", 0) or 0
|
||||||
if price > MAX_PRICE or price == 0:
|
if price > MAX_PRICE or price == 0:
|
||||||
excluded_price += 1
|
excluded_price += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (price {price})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
area = item.get("area", {}).get("main")
|
area = item.get("area", {}).get("main")
|
||||||
if area is not None and area < MIN_AREA:
|
if area is not None and area < MIN_AREA:
|
||||||
excluded_area += 1
|
excluded_area += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (area {area} m²)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
loc = item.get("location", {})
|
loc = item.get("location", {})
|
||||||
if not loc.get("latitude") or not loc.get("longitude"):
|
if not loc.get("latitude") or not loc.get("longitude"):
|
||||||
excluded_no_gps += 1
|
excluded_no_gps += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (no GPS)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pre_filtered.append(item)
|
pre_filtered.append(item)
|
||||||
|
|
||||||
print(f"\nPo předfiltraci:")
|
logger.info(f"\nPo předfiltraci:")
|
||||||
print(f" Vyloučeno (dispozice): {excluded_category}")
|
logger.info(f" Vyloučeno (dispozice): {excluded_category}")
|
||||||
print(f" Vyloučeno (cena): {excluded_price}")
|
logger.info(f" Vyloučeno (cena): {excluded_price}")
|
||||||
print(f" Vyloučeno (plocha): {excluded_area}")
|
logger.info(f" Vyloučeno (plocha): {excluded_area}")
|
||||||
print(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
|
||||||
print(f" Zbývá: {len(pre_filtered)}")
|
logger.info(f" Zbývá: {len(pre_filtered)}")
|
||||||
|
|
||||||
# Step 3: Fetch details for remaining listings (floor, building type)
|
# Step 3: Fetch details for remaining listings (floor, building type)
|
||||||
print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
|
||||||
results = []
|
results = []
|
||||||
excluded_panel = 0
|
excluded_panel = 0
|
||||||
excluded_floor = 0
|
excluded_floor = 0
|
||||||
excluded_detail = 0
|
excluded_detail = 0
|
||||||
cache_hits = 0
|
cache_hits = 0
|
||||||
|
properties_fetched = 0
|
||||||
|
|
||||||
for i, item in enumerate(pre_filtered):
|
for i, item in enumerate(pre_filtered):
|
||||||
|
if max_properties and properties_fetched >= max_properties:
|
||||||
|
logger.debug(f"Max properties limit reached: {max_properties}")
|
||||||
|
break
|
||||||
# Check cache — if hash_id exists and price unchanged, reuse
|
# Check cache — if hash_id exists and price unchanged, reuse
|
||||||
item_id = int(item["id"])
|
item_id = int(item["id"])
|
||||||
item_price = item.get("price", {}).get("total", 0) or 0
|
item_price = item.get("price", {}).get("total", 0) or 0
|
||||||
cached = cache.get(item_id)
|
cached = cache.get(item_id)
|
||||||
if cached and cached.get("price") == item_price:
|
if cached and cached.get("price") == item_price:
|
||||||
cache_hits += 1
|
cache_hits += 1
|
||||||
|
logger.debug(f"Cache hit for id={item_id}")
|
||||||
results.append(cached)
|
results.append(cached)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -215,6 +251,7 @@ def scrape():
|
|||||||
|
|
||||||
if not detail_data:
|
if not detail_data:
|
||||||
excluded_detail += 1
|
excluded_detail += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (detail fetch failed)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
detail = detail_data.get("offer", {}).get("detail", {})
|
detail = detail_data.get("offer", {}).get("detail", {})
|
||||||
@@ -225,20 +262,23 @@ def scrape():
|
|||||||
building_type = detail.get("buildingType", "")
|
building_type = detail.get("buildingType", "")
|
||||||
if building_type == "PANEL":
|
if building_type == "PANEL":
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen #{item['id']}: panel")
|
logger.debug(f"Filter: id={item['id']} - excluded (panel construction)")
|
||||||
|
logger.info(f"✗ Vyloučen #{item['id']}: panel")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check building position — exclude sídliště
|
# Check building position — exclude sídliště
|
||||||
building_position = detail.get("buildingPosition", "")
|
building_position = detail.get("buildingPosition", "")
|
||||||
if building_position and "ESTATE" in str(building_position).upper():
|
if building_position and "ESTATE" in str(building_position).upper():
|
||||||
excluded_panel += 1
|
excluded_panel += 1
|
||||||
print(f" ✗ Vyloučen #{item['id']}: sídliště")
|
logger.debug(f"Filter: id={item['id']} - excluded (building estate)")
|
||||||
|
logger.info(f"✗ Vyloučen #{item['id']}: sídliště")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check floor
|
# Check floor
|
||||||
floor = detail.get("floor")
|
floor = detail.get("floor")
|
||||||
if floor is not None and floor < MIN_FLOOR:
|
if floor is not None and floor < MIN_FLOOR:
|
||||||
excluded_floor += 1
|
excluded_floor += 1
|
||||||
|
logger.debug(f"Filter: id={item_id} - excluded (floor {floor})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Map building type
|
# Map building type
|
||||||
@@ -275,28 +315,46 @@ def scrape():
|
|||||||
"url": f"{BASE_URL}{item['url']}",
|
"url": f"{BASE_URL}{item['url']}",
|
||||||
"source": "realingo",
|
"source": "realingo",
|
||||||
"image": "",
|
"image": "",
|
||||||
|
"scraped_at": datetime.now().strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
properties_fetched += 1
|
||||||
|
|
||||||
if (i + 1) % 20 == 0:
|
if (i + 1) % 20 == 0:
|
||||||
print(f" Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
logger.info(f"\n{'=' * 60}")
|
||||||
print(f"Výsledky Realingo:")
|
logger.info(f"Výsledky Realingo:")
|
||||||
print(f" Předfiltrováno: {len(pre_filtered)}")
|
logger.info(f" Předfiltrováno: {len(pre_filtered)}")
|
||||||
print(f" Z cache (přeskočeno): {cache_hits}")
|
logger.info(f" Z cache (přeskočeno): {cache_hits}")
|
||||||
print(f" Vyloučeno (panel/síd): {excluded_panel}")
|
logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
|
||||||
print(f" Vyloučeno (patro): {excluded_floor}")
|
logger.info(f" Vyloučeno (patro): {excluded_floor}")
|
||||||
print(f" Vyloučeno (bez detailu): {excluded_detail}")
|
logger.info(f" Vyloučeno (bez detailu): {excluded_detail}")
|
||||||
print(f" ✓ Vyhovující byty: {len(results)}")
|
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||||
print(f"{'=' * 60}")
|
logger.info(f"{'=' * 60}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape apartments from Realingo.cz")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=None,
|
||||||
|
help="Maximum number of listing pages to scrape")
|
||||||
|
parser.add_argument("--max-properties", type=int, default=None,
|
||||||
|
help="Maximum number of properties to fetch details for")
|
||||||
|
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||||
|
help="Logging level (default: INFO)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, args.log_level),
|
||||||
|
format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
estates = scrape()
|
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||||
|
|
||||||
if estates:
|
if estates:
|
||||||
json_path = Path("byty_realingo.json")
|
json_path = Path("byty_realingo.json")
|
||||||
@@ -305,7 +363,7 @@ if __name__ == "__main__":
|
|||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
print(f"\n✓ Data uložena: {json_path.resolve()}")
|
logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
|
||||||
print(f"⏱ Celkový čas: {elapsed:.0f} s")
|
logger.info(f"⏱ Celkový čas: {elapsed:.0f} s")
|
||||||
else:
|
else:
|
||||||
print("\nŽádné byty z Realinga neodpovídají kritériím :(")
|
logger.info("\nŽádné byty z Realinga neodpovídají kritériím :(")
|
||||||
|
|||||||
119
server.py
Normal file
119
server.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Combined HTTP server: serves static files from DATA_DIR and
|
||||||
|
provides the ratings API at /api/ratings.
|
||||||
|
|
||||||
|
GET /api/ratings → returns ratings.json contents
|
||||||
|
POST /api/ratings → saves entire ratings object
|
||||||
|
GET /api/ratings/export → same as GET, with Content-Disposition: attachment
|
||||||
|
GET /<path> → serves static file from DATA_DIR
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from flask import Flask, jsonify, request, send_from_directory
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Flat-search map server")
|
||||||
|
parser.add_argument("--log-level", "-l", default=None, choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Log level (default: INFO)")
|
||||||
|
parser.add_argument("--verbose", "-v", action="store_true", help="Shorthand for --log-level DEBUG")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
log_level = logging.DEBUG if args.verbose else getattr(logging, args.log_level or "INFO")
|
||||||
|
|
||||||
|
PORT = int(os.environ.get("PORT", 8080))
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", ".")).resolve()
|
||||||
|
RATINGS_FILE = DATA_DIR / "ratings.json"
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=log_level,
|
||||||
|
format="%(asctime)s [server] %(levelname)s %(message)s",
|
||||||
|
datefmt="%Y-%m-%dT%H:%M:%S",
|
||||||
|
)
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
app = Flask(__name__, static_folder=None)
|
||||||
|
app.json.ensure_ascii = False
|
||||||
|
|
||||||
|
|
||||||
|
@app.after_request
|
||||||
|
def add_cors(response):
|
||||||
|
response.headers["Access-Control-Allow-Origin"] = "*"
|
||||||
|
response.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
|
||||||
|
response.headers["Access-Control-Allow-Headers"] = "Content-Type"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def load_ratings() -> dict:
|
||||||
|
try:
|
||||||
|
if RATINGS_FILE.exists():
|
||||||
|
return json.loads(RATINGS_FILE.read_text(encoding="utf-8"))
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Failed to load ratings: %s", e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def save_ratings(data: dict) -> None:
|
||||||
|
RATINGS_FILE.write_text(
|
||||||
|
json.dumps(data, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ratings", methods=["OPTIONS"])
|
||||||
|
@app.route("/api/ratings/export", methods=["OPTIONS"])
|
||||||
|
def ratings_options():
|
||||||
|
return ("", 204)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ratings", methods=["GET"])
|
||||||
|
def get_ratings():
|
||||||
|
ratings = load_ratings()
|
||||||
|
log.info("GET /api/ratings → %d ratings", len(ratings))
|
||||||
|
return jsonify(ratings)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ratings/export", methods=["GET"])
|
||||||
|
def export_ratings():
|
||||||
|
ratings = load_ratings()
|
||||||
|
log.info("GET /api/ratings/export → %d ratings", len(ratings))
|
||||||
|
response = jsonify(ratings)
|
||||||
|
response.headers["Content-Disposition"] = 'attachment; filename="ratings.json"'
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ratings", methods=["POST"])
|
||||||
|
def post_ratings():
|
||||||
|
length = request.content_length
|
||||||
|
if not length:
|
||||||
|
return jsonify({"error": "empty body"}), 400
|
||||||
|
try:
|
||||||
|
data = request.get_json(force=True, silent=False)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("Bad request body: %s", e)
|
||||||
|
return jsonify({"error": "invalid JSON"}), 400
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return jsonify({"error": "expected JSON object"}), 400
|
||||||
|
save_ratings(data)
|
||||||
|
log.info("POST /api/ratings → saved %d ratings", len(data))
|
||||||
|
return jsonify({"ok": True, "count": len(data)})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def index():
|
||||||
|
return send_from_directory(str(DATA_DIR), "mapa_bytu.html")
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/<path:filename>")
|
||||||
|
def static_files(filename):
|
||||||
|
return send_from_directory(str(DATA_DIR), filename)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
log.info("Server starting on port %d, data dir: %s", PORT, DATA_DIR)
|
||||||
|
log.info("Ratings file: %s", RATINGS_FILE)
|
||||||
|
app.run(host="0.0.0.0", port=PORT)
|
||||||
204
status.html
Normal file
204
status.html
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="cs">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Scraper status</title>
|
||||||
|
<style>
|
||||||
|
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||||
|
body {
|
||||||
|
font-family: system-ui, -apple-system, sans-serif;
|
||||||
|
background: #f5f5f5; color: #333;
|
||||||
|
padding: 24px; max-width: 640px; margin: 0 auto;
|
||||||
|
}
|
||||||
|
h1 { font-size: 22px; margin-bottom: 4px; }
|
||||||
|
.subtitle { color: #888; font-size: 13px; margin-bottom: 24px; }
|
||||||
|
.card {
|
||||||
|
background: white; border-radius: 12px; padding: 20px;
|
||||||
|
box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px;
|
||||||
|
}
|
||||||
|
.card h2 { font-size: 15px; margin-bottom: 12px; color: #555; }
|
||||||
|
.timestamp {
|
||||||
|
font-size: 28px; font-weight: 700; color: #1976D2;
|
||||||
|
}
|
||||||
|
.timestamp-ago { font-size: 13px; color: #999; margin-top: 2px; }
|
||||||
|
|
||||||
|
/* Source table */
|
||||||
|
.source-table { width: 100%; border-collapse: collapse; }
|
||||||
|
.source-table td { padding: 8px 0; border-bottom: 1px solid #f0f0f0; font-size: 14px; }
|
||||||
|
.source-table tr:last-child td { border-bottom: none; }
|
||||||
|
.source-table .name { font-weight: 600; }
|
||||||
|
.source-table .count { text-align: right; font-variant-numeric: tabular-nums; }
|
||||||
|
.source-table .rejected { text-align: right; color: #999; font-size: 12px; }
|
||||||
|
.badge {
|
||||||
|
display: inline-block; padding: 2px 8px; border-radius: 4px;
|
||||||
|
font-size: 11px; font-weight: 600; color: white;
|
||||||
|
}
|
||||||
|
.badge-ok { background: #4CAF50; }
|
||||||
|
.badge-err { background: #F44336; }
|
||||||
|
.badge-skip { background: #FF9800; }
|
||||||
|
|
||||||
|
/* Summary bar */
|
||||||
|
.summary-row {
|
||||||
|
display: flex; justify-content: space-between; align-items: center;
|
||||||
|
padding: 10px 0; border-bottom: 1px solid #f0f0f0;
|
||||||
|
}
|
||||||
|
.summary-row:last-child { border-bottom: none; }
|
||||||
|
.summary-label { font-size: 13px; color: #666; }
|
||||||
|
.summary-value { font-size: 18px; font-weight: 700; }
|
||||||
|
|
||||||
|
/* Source bar chart */
|
||||||
|
.bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; }
|
||||||
|
.bar-label { width: 90px; font-size: 12px; text-align: right; color: #666; }
|
||||||
|
.bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; position: relative; }
|
||||||
|
.bar-fill { height: 100%; border-radius: 4px; transition: width 0.5s ease; }
|
||||||
|
.bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; }
|
||||||
|
|
||||||
|
/* Loader */
|
||||||
|
.loader-wrap {
|
||||||
|
display: flex; flex-direction: column; align-items: center;
|
||||||
|
justify-content: center; padding: 60px 0;
|
||||||
|
}
|
||||||
|
.spinner {
|
||||||
|
width: 40px; height: 40px; border: 4px solid #e0e0e0;
|
||||||
|
border-top-color: #1976D2; border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
|
@keyframes spin { to { transform: rotate(360deg); } }
|
||||||
|
.loader-text { margin-top: 16px; color: #999; font-size: 14px; }
|
||||||
|
|
||||||
|
.error-msg { color: #F44336; padding: 40px 0; text-align: center; }
|
||||||
|
.link-row { text-align: center; margin-top: 8px; }
|
||||||
|
.link-row a { color: #1976D2; text-decoration: none; font-size: 14px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Scraper status</h1>
|
||||||
|
<div class="subtitle">maru-hleda-byt</div>
|
||||||
|
|
||||||
|
<div id="content">
|
||||||
|
<div class="loader-wrap">
|
||||||
|
<div class="spinner"></div>
|
||||||
|
<div class="loader-text">Nacitam status...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="link-row"><a href="mapa_bytu.html">Otevrit mapu</a></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
var COLORS = {
|
||||||
|
sreality: '#1976D2',
|
||||||
|
realingo: '#7B1FA2',
|
||||||
|
bezrealitky: '#E65100',
|
||||||
|
idnes: '#C62828',
|
||||||
|
psn: '#2E7D32',
|
||||||
|
cityhome: '#00838F',
|
||||||
|
};
|
||||||
|
|
||||||
|
function timeAgo(dateStr) {
|
||||||
|
var d = new Date(dateStr);
|
||||||
|
var now = new Date();
|
||||||
|
var diff = Math.floor((now - d) / 1000);
|
||||||
|
if (diff < 60) return 'prave ted';
|
||||||
|
if (diff < 3600) return Math.floor(diff / 60) + ' min zpet';
|
||||||
|
if (diff < 86400) return Math.floor(diff / 3600) + ' hod zpet';
|
||||||
|
return Math.floor(diff / 86400) + ' dni zpet';
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDate(dateStr) {
|
||||||
|
var d = new Date(dateStr);
|
||||||
|
var day = d.getDate();
|
||||||
|
var months = ['ledna','unora','brezna','dubna','kvetna','cervna',
|
||||||
|
'cervence','srpna','zari','rijna','listopadu','prosince'];
|
||||||
|
var hh = String(d.getHours()).padStart(2, '0');
|
||||||
|
var mm = String(d.getMinutes()).padStart(2, '0');
|
||||||
|
return day + '. ' + months[d.getMonth()] + ' ' + d.getFullYear() + ', ' + hh + ':' + mm;
|
||||||
|
}
|
||||||
|
|
||||||
|
function render(data) {
|
||||||
|
// Check if scrape is currently running
|
||||||
|
if (data.status === 'running') {
|
||||||
|
document.getElementById('content').innerHTML =
|
||||||
|
'<div class="loader-wrap">' +
|
||||||
|
'<div class="spinner"></div>' +
|
||||||
|
'<div class="loader-text">Scraper prave bezi...</div>' +
|
||||||
|
'</div>';
|
||||||
|
setTimeout(loadStatus, 30000);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var sources = data.sources || [];
|
||||||
|
var totalOk = 0, totalRej = 0;
|
||||||
|
var maxCount = 0;
|
||||||
|
sources.forEach(function(s) {
|
||||||
|
totalOk += s.accepted || 0;
|
||||||
|
totalRej += s.rejected || 0;
|
||||||
|
if (s.accepted > maxCount) maxCount = s.accepted;
|
||||||
|
});
|
||||||
|
|
||||||
|
var html = '';
|
||||||
|
|
||||||
|
// Timestamp card
|
||||||
|
html += '<div class="card">';
|
||||||
|
html += '<h2>Posledni scrape</h2>';
|
||||||
|
html += '<div class="timestamp">' + formatDate(data.timestamp) + '</div>';
|
||||||
|
html += '<div class="timestamp-ago">' + timeAgo(data.timestamp) + '</div>';
|
||||||
|
if (data.duration_sec) {
|
||||||
|
html += '<div class="timestamp-ago">Trvani: ' + Math.round(data.duration_sec) + 's</div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
// Summary card
|
||||||
|
html += '<div class="card">';
|
||||||
|
html += '<h2>Souhrn</h2>';
|
||||||
|
html += '<div class="summary-row"><span class="summary-label">Vyhovujicich bytu</span><span class="summary-value" style="color:#4CAF50">' + totalOk + '</span></div>';
|
||||||
|
html += '<div class="summary-row"><span class="summary-label">Vyloucenych</span><span class="summary-value" style="color:#999">' + totalRej + '</span></div>';
|
||||||
|
if (data.deduplicated !== undefined) {
|
||||||
|
html += '<div class="summary-row"><span class="summary-label">Po deduplikaci (v mape)</span><span class="summary-value" style="color:#1976D2">' + data.deduplicated + '</span></div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
// Sources card
|
||||||
|
html += '<div class="card">';
|
||||||
|
html += '<h2>Zdroje</h2>';
|
||||||
|
sources.forEach(function(s) {
|
||||||
|
var color = COLORS[s.name.toLowerCase()] || '#999';
|
||||||
|
var pct = maxCount > 0 ? Math.round((s.accepted / maxCount) * 100) : 0;
|
||||||
|
var badge = s.error
|
||||||
|
? '<span class="badge badge-err">chyba</span>'
|
||||||
|
: (s.accepted === 0 ? '<span class="badge badge-skip">0</span>' : '<span class="badge badge-ok">OK</span>');
|
||||||
|
|
||||||
|
html += '<div style="margin-bottom:12px;">';
|
||||||
|
html += '<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">';
|
||||||
|
html += '<span style="font-weight:600;font-size:14px;">' + s.name + ' ' + badge + '</span>';
|
||||||
|
html += '<span style="font-size:12px;color:#999;">' + (s.rejected || 0) + ' vyloucenych</span>';
|
||||||
|
html += '</div>';
|
||||||
|
html += '<div class="bar-row">';
|
||||||
|
html += '<div class="bar-track"><div class="bar-fill" style="width:' + pct + '%;background:' + color + ';"></div></div>';
|
||||||
|
html += '<span class="bar-count">' + (s.accepted || 0) + '</span>';
|
||||||
|
html += '</div>';
|
||||||
|
html += '</div>';
|
||||||
|
});
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
document.getElementById('content').innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadStatus() {
|
||||||
|
fetch('status.json?t=' + Date.now())
|
||||||
|
.then(function(r) {
|
||||||
|
if (!r.ok) throw new Error(r.status);
|
||||||
|
return r.json();
|
||||||
|
})
|
||||||
|
.then(render)
|
||||||
|
.catch(function(err) {
|
||||||
|
document.getElementById('content').innerHTML =
|
||||||
|
'<div class="error-msg">Status zatim neni k dispozici.<br><small>(' + err.message + ')</small></div>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
loadStatus();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user