diff --git a/merge_and_map.py b/merge_and_map.py
index 1eb9406..335b758 100644
--- a/merge_and_map.py
+++ b/merge_and_map.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
-Sloučí data ze Sreality, Realinga, Bezrealitek, iDNES, PSN a CityHome,
+Sloučí data ze Sreality, Realinga, Bezrealitek, iDNES, PSN, CityHome a Bazoše,
deduplikuje a vygeneruje mapu.
Deduplikace: stejná ulice (z locality) + stejná cena + stejná plocha = duplikát.
PSN a CityHome mají při deduplikaci prioritu (načtou se první).
@@ -44,6 +44,7 @@ def main():
("Realingo", "byty_realingo.json"),
("Bezrealitky", "byty_bezrealitky.json"),
("iDNES", "byty_idnes.json"),
+ ("Bazoš", "byty_bazos.json"),
]
all_estates = []
diff --git a/run_all.sh b/run_all.sh
index 79f682d..e19b75f 100755
--- a/run_all.sh
+++ b/run_all.sh
@@ -13,7 +13,7 @@ RED='\033[0;31m'
BOLD='\033[1m'
NC='\033[0m'
-TOTAL=6
+TOTAL=7
CURRENT=0
FAILED=0
START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
@@ -98,6 +98,9 @@ PID_CH=$!
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
+step "Bazoš"
+python3 scrape_bazos.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bazoš selhalo${NC}"; FAILED=$((FAILED + 1)); }
+
step "Realingo"
python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
@@ -117,7 +120,7 @@ python3 generate_status.py --start-time "$START_TIME" --duration "$DURATION" $KE
echo ""
echo "============================================================"
if [ $FAILED -eq 0 ]; then
- echo -e "${GREEN}${BOLD}Hotovo! Všech 6 zdrojů úspěšně staženo.${NC}"
+ echo -e "${GREEN}${BOLD}Hotovo! Všech 7 zdrojů úspěšně staženo.${NC}"
else
echo -e "${RED}${BOLD}Hotovo s $FAILED chybami.${NC}"
fi
diff --git a/scrape_and_map.py b/scrape_and_map.py
index 0b49717..f8ee4db 100644
--- a/scrape_and_map.py
+++ b/scrape_and_map.py
@@ -480,8 +480,8 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
floor_note = '
⚠ 2. NP — zvážit klidnost lokality'
source = e.get("source", "sreality")
- source_labels = {"sreality": "Sreality", "realingo": "Realingo", "bezrealitky": "Bezrealitky", "idnes": "iDNES", "psn": "PSN", "cityhome": "CityHome"}
- source_colors = {"sreality": "#1976D2", "realingo": "#00897B", "bezrealitky": "#E91E63", "idnes": "#FF6F00", "psn": "#D32F2F", "cityhome": "#D32F2F"}
+ source_labels = {"sreality": "Sreality", "realingo": "Realingo", "bezrealitky": "Bezrealitky", "idnes": "iDNES", "psn": "PSN", "cityhome": "CityHome", "bazos": "Bazoš"}
+ source_colors = {"sreality": "#1976D2", "realingo": "#00897B", "bezrealitky": "#E91E63", "idnes": "#FF6F00", "psn": "#D32F2F", "cityhome": "#D32F2F", "bazos": "#7B1FA2"}
source_label = source_labels.get(source, source)
source_color = source_colors.get(source, "#999")
diff --git a/scrape_bazos.py b/scrape_bazos.py
new file mode 100644
index 0000000..21091d4
--- /dev/null
+++ b/scrape_bazos.py
@@ -0,0 +1,560 @@
+#!/usr/bin/env python3
+"""
+Bazoš.cz scraper.
+Stáhne byty na prodej v Praze a vyfiltruje podle kritérií.
+Výstup: byty_bazos.json
+"""
+from __future__ import annotations
+
+import argparse
+from datetime import datetime
+import json
+import logging
+import math
+import re
+import time
+import urllib.request
+import urllib.parse
+from pathlib import Path
+from scraper_stats import write_stats, validate_listing
+
+STATS_FILE = "stats_bazos.json"
+
+logger = logging.getLogger(__name__)
+
+# ── Konfigurace ─────────────────────────────────────────────────────────────
+
+MAX_PRICE = 14_000_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+PER_PAGE = 20 # Bazoš vrací 20 na stránku
+
+WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
+
+# Regex patterns pro parsování dispozice, plochy a patra z textu
+DISP_RE = re.compile(r'(\d)\s*\+\s*(kk|1)', re.IGNORECASE)
+AREA_RE = re.compile(r'(\d+(?:[.,]\d+)?)\s*m[²2\s,.]', re.IGNORECASE)
+FLOOR_RE = re.compile(r'(\d+)\s*[./]\s*(\d+)\s*(?:NP|patr|podlaž|floor)', re.IGNORECASE)
+FLOOR_RE2 = re.compile(r'(\d+)\.\s*(?:NP|patr[eouě]|podlaž[ií])', re.IGNORECASE)
+FLOOR_RE3 = re.compile(r'(?:patr[eouě]|podlaž[ií]|NP)\s*[:\s]*(\d+)', re.IGNORECASE)
+PANEL_RE = re.compile(r'panel(?:ov|ák|\.)', re.IGNORECASE)
+SIDLISTE_RE = re.compile(r'sídliště|sidliste|panelák', re.IGNORECASE)
+
+HEADERS = {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "Accept": "text/html,application/xhtml+xml",
+ "Accept-Language": "cs,en;q=0.9",
+}
+
+BASE_URL = "https://reality.bazos.cz"
+SEARCH_PARAMS = "hledat=&rubriky=reality&hlokalita=Praha&humkreis=25&cenado={max_price}&kitx=ano"
+
+
+def fetch_url(url: str, retries: int = 3) -> str:
+ """Fetch URL and return HTML string with retry on transient errors."""
+ for attempt in range(retries):
+ try:
+ logger.debug(f"HTTP GET request (attempt {attempt + 1}/{retries}): {url}")
+ req = urllib.request.Request(url, headers=HEADERS)
+ resp = urllib.request.urlopen(req, timeout=30)
+ html = resp.read().decode("utf-8", errors="replace")
+ logger.debug(f"HTTP response: status={resp.status}, size={len(html)} bytes")
+ return html
+ except urllib.error.HTTPError:
+ raise
+ except (ConnectionResetError, ConnectionError, urllib.error.URLError, OSError) as e:
+ if attempt < retries - 1:
+ wait = (attempt + 1) * 3
+ logger.warning(f"Connection error (retry {attempt + 1}/{retries} after {wait}s): {e}")
+ time.sleep(wait)
+ else:
+ logger.error(f"HTTP request failed after {retries} attempts: {e}", exc_info=True)
+ raise
+
+
+def format_price(price: int) -> str:
+ s = str(price)
+ parts = []
+ while s:
+ parts.append(s[-3:])
+ s = s[:-3]
+ return " ".join(reversed(parts)) + " Kč"
+
+
+def parse_price(text: str) -> int:
+ """Parse price from text like '5 250 000 Kč' → 5250000."""
+ cleaned = re.sub(r'[^\d]', '', text)
+ return int(cleaned) if cleaned else 0
+
+
+def parse_disposition(text: str) -> str | None:
+ """Parse disposition from title/description like '3+kk', '4+1'."""
+ m = DISP_RE.search(text)
+ if m:
+ rooms = m.group(1)
+ suffix = m.group(2).lower()
+ return f"{rooms}+{suffix}"
+ return None
+
+
+def parse_area(text: str) -> float | None:
+ """Parse area from text like '82 m²' → 82.0."""
+ m = AREA_RE.search(text)
+ if m:
+ return float(m.group(1).replace(',', '.'))
+ return None
+
+
+def parse_floor(text: str) -> int | None:
+ """Parse floor number from description."""
+ for pattern in [FLOOR_RE, FLOOR_RE2, FLOOR_RE3]:
+ m = pattern.search(text)
+ if m:
+ return int(m.group(1))
+ return None
+
+
+def is_panel(text: str) -> bool:
+ """Check if description mentions panel construction."""
+ return bool(PANEL_RE.search(text))
+
+
+def is_sidliste(text: str) -> bool:
+ """Check if description mentions housing estate."""
+ return bool(SIDLISTE_RE.search(text))
+
+
+def fetch_listing_page(offset: int = 0, pagination_params: str | None = None) -> tuple[list[dict], int, str | None]:
+ """
+ Fetch a page of listings from Bazoš.
+ Returns (list of basic listing dicts, total count, pagination_params for next pages).
+ """
+ if pagination_params and offset > 0:
+ # Use resolved numeric params from first page's pagination links
+ url = f"{BASE_URL}/prodam/byt/{offset}/?{pagination_params}"
+ else:
+ params = SEARCH_PARAMS.format(max_price=MAX_PRICE)
+ if offset > 0:
+ url = f"{BASE_URL}/prodam/byt/{offset}/?{params}"
+ else:
+ url = f"{BASE_URL}/prodam/byt/?{params}"
+
+ html = fetch_url(url)
+
+ # Parse total count: "Zobrazeno 1-20 z 727"
+ total = 0
+ total_match = re.search(r'z\s+([\d\s]+)\s', html)
+ if total_match:
+ total = int(total_match.group(1).replace(' ', ''))
+
+ # Extract resolved pagination params from first page (Bazoš converts
+ # hlokalita=Praha → hlokalita=11000, and pagination only works with numeric form)
+ resolved_params = None
+ pag_link = re.search(r'href="/prodam/byt/\d+/\?([^"]+)"', html)
+ if pag_link:
+ resolved_params = pag_link.group(1)
+
+ # Parse listings — split by listing blocks (class="inzeraty inzeratyflex")
+ listings = []
+ all_blocks = re.split(r'
', html)[1:] # skip before first
+
+ for block in all_blocks:
+ # Extract URL and ID from first link (/inzerat/XXXXXX/slug.php)
+ url_match = re.search(r'href="(/inzerat/(\d+)/[^"]*)"', block)
+ if not url_match:
+ continue
+ detail_path = url_match.group(1)
+ listing_id = int(url_match.group(2))
+
+ # Title — class=nadpis (without quotes) or class="nadpis"
+ title_match = re.search(r'class=.?nadpis.?[^>]*>\s*
]*>([^<]+)', block)
+ title = title_match.group(1).strip() if title_match else ""
+
+ # Price — inside
within inzeratycena
+ price_match = re.search(r'class="inzeratycena"[^>]*>.*?]*>([^<]+)', block, re.DOTALL)
+ if not price_match:
+ # Fallback: direct text in inzeratycena
+ price_match = re.search(r'class="inzeratycena"[^>]*>\s*(?:)?([^<]+)', block)
+ price_text = price_match.group(1).strip() if price_match else ""
+ price = parse_price(price_text)
+
+ # Location
+ loc_match = re.search(r'class="inzeratylok"[^>]*>(.*?) ', block, re.DOTALL)
+ location = ""
+ if loc_match:
+ location = re.sub(r'<[^>]+>', ' ', loc_match.group(1)).strip()
+ location = re.sub(r'\s+', ' ', location)
+
+ # Date — [5.3. 2026]
+ date_match = re.search(r'\[(\d+\.\d+\.\s*\d{4})\]', block)
+ date_str = date_match.group(1).strip() if date_match else ""
+
+ # Description preview — class=popis (without quotes) or class="popis"
+ desc_match = re.search(r'class=.?popis.?[^>]*>(.*?)', block, re.DOTALL)
+ description = ""
+ if desc_match:
+ description = re.sub(r'<[^>]+>', ' ', desc_match.group(1)).strip()
+ description = re.sub(r'\s+', ' ', description)
+
+ # Image —
+ img_match = re.search(r'
]*src="([^"]+)"[^>]*class="obrazek"', block)
+ if not img_match:
+ img_match = re.search(r'class="obrazek"[^>]*src="([^"]+)"', block)
+ image = img_match.group(1) if img_match else ""
+ if "empty.gif" in image:
+ image = ""
+
+ listings.append({
+ "id": listing_id,
+ "title": title,
+ "price": price,
+ "location": location,
+ "date": date_str,
+ "description": description,
+ "detail_path": detail_path,
+ "image": image,
+ })
+
+ logger.debug(f"Offset {offset}: found {len(listings)} listings, total={total}")
+ return listings, total, resolved_params
+
+
+def fetch_detail(path: str) -> dict | None:
+ """Fetch listing detail page and extract GPS, full description."""
+ try:
+ url = f"{BASE_URL}{path}"
+ html = fetch_url(url)
+
+ result = {}
+
+ # GPS from Google Maps link
+ gps_match = re.search(r'google\.com/maps[^"]*place/([\d.]+),([\d.]+)', html)
+ if gps_match:
+ result["lat"] = float(gps_match.group(1))
+ result["lon"] = float(gps_match.group(2))
+
+ # Full description — Bazoš uses unquoted class=popisdetail
+ desc_match = re.search(r'class=.?popisdetail.?[^>]*>(.*?)', html, re.DOTALL)
+ if desc_match:
+ desc = re.sub(r'<[^>]+>', ' ', desc_match.group(1)).strip()
+ desc = re.sub(r'\s+', ' ', desc)
+ result["description"] = desc
+
+ # Location from detail
+ loc_match = re.search(r'Lokalita:\s*]*>(.*?) | ', html, re.DOTALL)
+ if loc_match:
+ loc = re.sub(r'<[^>]+>', ' ', loc_match.group(1)).strip()
+ loc = re.sub(r'\s+', ' ', loc)
+ result["detail_location"] = loc
+
+ return result
+
+ except Exception as e:
+ logger.warning(f"Detail fetch failed for {path}: {e}")
+ return None
+
+
+def load_cache(json_path: str = "byty_bazos.json") -> dict[int, dict]:
+ """Load previously scraped data as cache keyed by hash_id."""
+ path = Path(json_path)
+ if not path.exists():
+ return {}
+ try:
+ data = json.loads(path.read_text(encoding="utf-8"))
+ return {e["hash_id"]: e for e in data if "hash_id" in e}
+ except (json.JSONDecodeError, KeyError):
+ return {}
+
+
+def scrape(max_pages: int | None = None, max_properties: int | None = None):
+ _run_start = time.time()
+ _run_ts = datetime.now().isoformat(timespec="seconds")
+ cache = load_cache()
+ today = datetime.now().strftime("%Y-%m-%d")
+
+ logger.info("=" * 60)
+ logger.info("Stahuji inzeráty z Bazoš.cz")
+ logger.info(f"Cena: do {format_price(MAX_PRICE)}")
+ logger.info(f"Min. plocha: {MIN_AREA} m²")
+ logger.info(f"Patro: od {MIN_FLOOR}. NP")
+ logger.info(f"Region: Praha")
+ if cache:
+ logger.info(f"Cache: {len(cache)} bytů z minulého běhu")
+ if max_pages:
+ logger.info(f"Max. stran: {max_pages}")
+ if max_properties:
+ logger.info(f"Max. bytů: {max_properties}")
+ logger.info("=" * 60)
+
+ # Step 1: Fetch listing pages
+ logger.info("\nFáze 1: Stahování seznamu inzerátů...")
+ all_listings = {} # id -> listing dict (dedup)
+ page = 1
+ offset = 0
+ total = None
+ pagination_params = None # resolved numeric params from first page
+
+ while True:
+ if max_pages and page > max_pages:
+ logger.debug(f"Max pages limit reached: {max_pages}")
+ break
+
+ logger.info(f"Strana {page} (offset {offset}) ...")
+ listings, total_count, resolved = fetch_listing_page(offset, pagination_params)
+ if resolved and not pagination_params:
+ pagination_params = resolved
+ logger.debug(f"Resolved pagination params: {pagination_params}")
+
+ if total is None and total_count > 0:
+ total = total_count
+ total_pages = math.ceil(total / PER_PAGE)
+ logger.info(f"→ Celkem {total} inzerátů, ~{total_pages} stran")
+
+ if not listings:
+ logger.debug(f"No listings found on page {page}, stopping")
+ break
+
+ for lst in listings:
+ lid = lst["id"]
+ if lid not in all_listings:
+ all_listings[lid] = lst
+
+ page += 1
+ offset += PER_PAGE
+ if total and offset >= total:
+ break
+ time.sleep(0.5)
+
+ logger.info(f"\nStaženo: {len(all_listings)} unikátních inzerátů")
+
+ # Step 2: Pre-filter by disposition, price, area from listing data
+ pre_filtered = []
+ excluded_disp = 0
+ excluded_price = 0
+ excluded_area = 0
+ excluded_no_disp = 0
+
+ for lst in all_listings.values():
+ title_and_desc = f"{lst['title']} {lst['description']}"
+
+ # Parse disposition
+ disp = parse_disposition(title_and_desc)
+ if not disp:
+ excluded_no_disp += 1
+ logger.debug(f"Filter: id={lst['id']} - excluded (no disposition found in '{lst['title']}')")
+ continue
+ if disp not in WANTED_DISPOSITIONS:
+ excluded_disp += 1
+ logger.debug(f"Filter: id={lst['id']} - excluded (disposition {disp})")
+ continue
+
+ # Price
+ price = lst["price"]
+ if price <= 0 or price > MAX_PRICE:
+ excluded_price += 1
+ logger.debug(f"Filter: id={lst['id']} - excluded (price {price})")
+ continue
+
+ # Area (if parseable from listing)
+ area = parse_area(title_and_desc)
+ if area is not None and area < MIN_AREA:
+ excluded_area += 1
+ logger.debug(f"Filter: id={lst['id']} - excluded (area {area} m²)")
+ continue
+
+ lst["_disposition"] = disp
+ lst["_area"] = area
+ pre_filtered.append(lst)
+
+ logger.info(f"\nPo předfiltraci:")
+ logger.info(f" Vyloučeno (bez dispozice): {excluded_no_disp}")
+ logger.info(f" Vyloučeno (dispozice): {excluded_disp}")
+ logger.info(f" Vyloučeno (cena): {excluded_price}")
+ logger.info(f" Vyloučeno (plocha): {excluded_area}")
+ logger.info(f" Zbývá: {len(pre_filtered)}")
+
+ # Step 3: Fetch details (for GPS + full description)
+ logger.info(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+ results = []
+ excluded_panel = 0
+ excluded_floor = 0
+ excluded_no_gps = 0
+ excluded_detail = 0
+ excluded_area_detail = 0
+ cache_hits = 0
+ properties_fetched = 0
+
+ for i, lst in enumerate(pre_filtered):
+ if max_properties and properties_fetched >= max_properties:
+ logger.debug(f"Max properties limit reached: {max_properties}")
+ break
+
+ listing_id = lst["id"]
+ price = lst["price"]
+
+ # Check cache
+ cached = cache.get(listing_id)
+ if cached and cached.get("price") == price:
+ cache_hits += 1
+ logger.debug(f"Cache hit for id={listing_id}")
+ results.append(cached)
+ continue
+
+ time.sleep(0.4)
+ detail = fetch_detail(lst["detail_path"])
+
+ if not detail:
+ excluded_detail += 1
+ logger.debug(f"Filter: id={listing_id} - excluded (detail fetch failed)")
+ continue
+
+ # GPS required
+ lat = detail.get("lat")
+ lon = detail.get("lon")
+ if not lat or not lon:
+ excluded_no_gps += 1
+ logger.debug(f"Filter: id={listing_id} - excluded (no GPS)")
+ continue
+
+ # Full text for filtering
+ full_desc = detail.get("description", "")
+ full_text = f"{lst['title']} {lst['description']} {full_desc}"
+
+ # Panel check
+ if is_panel(full_text):
+ excluded_panel += 1
+ logger.info(f"✗ Vyloučen #{listing_id}: panelová stavba")
+ continue
+
+ # Sídliště check
+ if is_sidliste(full_text):
+ excluded_panel += 1
+ logger.info(f"✗ Vyloučen #{listing_id}: sídliště")
+ continue
+
+ # Floor
+ floor = parse_floor(full_text)
+ if floor is not None and floor < MIN_FLOOR:
+ excluded_floor += 1
+ logger.debug(f"Filter: id={listing_id} - excluded (floor {floor})")
+ continue
+
+ # Area — re-check from detail if not found before
+ area = lst.get("_area") or parse_area(full_desc)
+ if area is not None and area < MIN_AREA:
+ excluded_area_detail += 1
+ logger.debug(f"Filter: id={listing_id} - excluded (area {area} m² from detail)")
+ continue
+
+ disp = lst["_disposition"]
+ locality = detail.get("detail_location") or lst["location"]
+
+ result = {
+ "hash_id": listing_id,
+ "name": f"Prodej bytu {disp} {int(area) if area else '?'} m²",
+ "price": price,
+ "price_formatted": format_price(price),
+ "locality": locality,
+ "lat": lat,
+ "lon": lon,
+ "disposition": disp,
+ "floor": floor,
+ "area": area,
+ "building_type": "neuvedeno",
+ "ownership": "neuvedeno",
+ "url": f"{BASE_URL}{lst['detail_path']}",
+ "source": "bazos",
+ "image": lst.get("image", ""),
+ "scraped_at": today,
+ "first_seen": cached.get("first_seen", today) if cached else today,
+ "last_changed": today if not cached or cached.get("price") != price else cached.get("last_changed", today),
+ }
+
+ if not validate_listing(result, "bazos"):
+ continue
+
+ results.append(result)
+ properties_fetched += 1
+
+ if (i + 1) % 20 == 0:
+ logger.info(f"Zpracováno {i + 1}/{len(pre_filtered)} ...")
+
+ logger.info(f"\n{'=' * 60}")
+ logger.info(f"Výsledky Bazoš:")
+ logger.info(f" Předfiltrováno: {len(pre_filtered)}")
+ logger.info(f" Z cache (přeskočeno): {cache_hits}")
+ logger.info(f" Vyloučeno (panel/síd): {excluded_panel}")
+ logger.info(f" Vyloučeno (patro): {excluded_floor}")
+ logger.info(f" Vyloučeno (bez GPS): {excluded_no_gps}")
+ logger.info(f" Vyloučeno (bez detailu): {excluded_detail}")
+ logger.info(f" Vyloučeno (plocha det): {excluded_area_detail}")
+ logger.info(f" ✓ Vyhovující byty: {len(results)}")
+ logger.info(f"{'=' * 60}")
+
+ write_stats(STATS_FILE, {
+ "source": "Bazoš",
+ "timestamp": _run_ts,
+ "duration_sec": round(time.time() - _run_start, 1),
+ "success": True,
+ "accepted": len(results),
+ "fetched": len(all_listings),
+ "pages": page - 1,
+ "cache_hits": cache_hits,
+ "excluded": {
+ "bez dispozice": excluded_no_disp,
+ "dispozice": excluded_disp,
+ "cena": excluded_price,
+ "plocha": excluded_area + excluded_area_detail,
+ "bez GPS": excluded_no_gps,
+ "panel/síd": excluded_panel,
+ "patro": excluded_floor,
+ "bez detailu": excluded_detail,
+ },
+ })
+ return results
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Scrape apartments from Bazoš.cz")
+ parser.add_argument("--max-pages", type=int, default=None,
+ help="Maximum number of listing pages to scrape")
+ parser.add_argument("--max-properties", type=int, default=None,
+ help="Maximum number of properties to fetch details for")
+ parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+ help="Logging level (default: INFO)")
+ args = parser.parse_args()
+
+ logging.basicConfig(
+ level=getattr(logging, args.log_level),
+ format="[%(levelname)s] %(asctime)s - %(name)s - %(message)s",
+ handlers=[logging.StreamHandler()]
+ )
+
+ _run_ts = datetime.now().isoformat(timespec="seconds")
+ start = time.time()
+ try:
+ estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
+ except Exception as e:
+ logger.error(f"Scraper failed: {e}", exc_info=True)
+ write_stats(STATS_FILE, {
+ "source": "Bazoš",
+ "timestamp": _run_ts,
+ "duration_sec": round(time.time() - start, 1),
+ "success": False,
+ "accepted": 0,
+ "fetched": 0,
+ "error": str(e),
+ })
+ raise
+
+ if estates:
+ json_path = Path("byty_bazos.json")
+ json_path.write_text(
+ json.dumps(estates, ensure_ascii=False, indent=2),
+ encoding="utf-8",
+ )
+ elapsed = time.time() - start
+ logger.info(f"\n✓ Data uložena: {json_path.resolve()}")
+ logger.info(f"⏱ Celkový čas: {elapsed:.0f} s")
+ else:
+ logger.info("\nŽádné byty z Bazoše neodpovídají kritériím :(")