Upload files to "/"

v1 scrapery
2026-02-13 16:11:28 +00:00
parent 82d1f94104
commit 846d0bd9f2
5 changed files with 1760 additions and 0 deletions
--- a/scrape_bezrealitky.py
+++ b/scrape_bezrealitky.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+"""
+Bezrealitky.cz scraper.
+Stáhne byty na prodej v Praze a vyfiltruje podle kritérií.
+Výstup: byty_bezrealitky.json
+"""
+from __future__ import annotations
+
+import json
+import math
+import re
+import time
+import urllib.request
+from pathlib import Path
+
+# ── Konfigurace ─────────────────────────────────────────────────────────────
+
+MAX_PRICE = 13_500_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+PER_PAGE = 15  # Bezrealitky vrací 15 na stránku
+
+# Dispozice které chceme
+WANTED_DISPOSITIONS = {
+    "DISP_3_KK", "DISP_3_1",
+    "DISP_4_KK", "DISP_4_1",
+    "DISP_5_KK", "DISP_5_1",
+    "DISP_6",
+    "DISP_OTHER",  # atypické
+}
+
+DISPOSITION_LABELS = {
+    "DISP_1_KK": "1+kk", "DISP_1_1": "1+1",
+    "DISP_2_KK": "2+kk", "DISP_2_1": "2+1",
+    "DISP_3_KK": "3+kk", "DISP_3_1": "3+1",
+    "DISP_4_KK": "4+kk", "DISP_4_1": "4+1",
+    "DISP_5_KK": "5+kk", "DISP_5_1": "5+1",
+    "DISP_6": "6+",
+    "DISP_OTHER": "Atypický",
+}
+
+CONSTRUCTION_MAP = {
+    "BRICK": "Cihlová",
+    "PANEL": "Panelová",
+    "WOOD": "Dřevostavba",
+    "MIXED": "Smíšená",
+    "MONTAGE": "Montovaná",
+    "STEEL": "Ocelová",
+}
+
+OWNERSHIP_MAP = {
+    "OSOBNI": "Osobní",
+    "DRUZSTEVNI": "Družstevní",
+    "STATNI": "Státní/obecní",
+}
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml",
+    "Accept-Language": "cs,en;q=0.9",
+}
+
+BASE_URL = "https://www.bezrealitky.cz"
+
+
+def fetch_page(page: int) -> tuple[list[dict], int]:
+    """
+    Fetch a listing page from Bezrealitky.
+    Returns (list of advert dicts from Apollo cache, total count).
+    """
+    url = f"{BASE_URL}/vypis/nabidka-prodej/byt/praha?page={page}"
+    req = urllib.request.Request(url, headers=HEADERS)
+    resp = urllib.request.urlopen(req, timeout=30)
+    html = resp.read().decode("utf-8")
+
+    match = re.search(
+        r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
+        html, re.DOTALL
+    )
+    if not match:
+        return [], 0
+
+    data = json.loads(match.group(1))
+    cache = data["props"]["pageProps"]["apolloCache"]
+
+    # Extract adverts from cache
+    adverts = []
+    for key, val in cache.items():
+        if key.startswith("Advert:") and isinstance(val, dict) and val.get("__typename") == "Advert":
+            adverts.append(val)
+
+    # Get total count from ROOT_QUERY
+    total = 0
+    root = cache.get("ROOT_QUERY", {})
+    for key, val in root.items():
+        if "listAdverts" in key and isinstance(val, dict):
+            tc = val.get("totalCount")
+            if tc and tc > total:
+                total = tc
+
+    return adverts, total
+
+
+def fetch_detail(uri: str) -> dict | None:
+    """Fetch detail page for a listing."""
+    try:
+        url = f"{BASE_URL}/nemovitosti-byty-domy/{uri}"
+        req = urllib.request.Request(url, headers=HEADERS)
+        resp = urllib.request.urlopen(req, timeout=30)
+        html = resp.read().decode("utf-8")
+
+        match = re.search(
+            r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
+            html, re.DOTALL
+        )
+        if not match:
+            return None
+
+        data = json.loads(match.group(1))
+        cache = data["props"]["pageProps"]["apolloCache"]
+
+        # Find the full advert in cache
+        for key, val in cache.items():
+            if key.startswith("Advert:") and isinstance(val, dict):
+                # Detail pages have much more fields
+                if "construction" in val or "etage" in val or "ownership" in val:
+                    return val
+
+    except Exception as e:
+        print(f"    Warning: detail failed for {uri}: {e}")
+    return None
+
+
+def format_price(price: int) -> str:
+    s = str(price)
+    parts = []
+    while s:
+        parts.append(s[-3:])
+        s = s[:-3]
+    return " ".join(reversed(parts)) + " Kč"
+
+
+def load_cache(json_path: str = "byty_bezrealitky.json") -> dict[int, dict]:
+    """Load previously scraped data as cache keyed by hash_id."""
+    path = Path(json_path)
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return {e["hash_id"]: e for e in data if "hash_id" in e}
+    except (json.JSONDecodeError, KeyError):
+        return {}
+
+
+def scrape():
+    cache = load_cache()
+
+    print("=" * 60)
+    print("Stahuji inzeráty z Bezrealitky.cz")
+    print(f"Cena: do {format_price(MAX_PRICE)}")
+    print(f"Min. plocha: {MIN_AREA} m²")
+    print(f"Patro: od {MIN_FLOOR}. NP")
+    print(f"Region: Praha")
+    if cache:
+        print(f"Cache: {len(cache)} bytů z minulého běhu")
+    print("=" * 60)
+
+    # Step 1: Fetch all listing pages
+    print("\nFáze 1: Stahování seznamu inzerátů...")
+    all_adverts = {}  # id -> advert dict (dedup)
+    page = 1
+    total = None
+
+    while True:
+        print(f"  Strana {page} ...")
+        adverts, total_count = fetch_page(page)
+
+        if total is None and total_count > 0:
+            total = total_count
+            total_pages = math.ceil(total / PER_PAGE)
+            print(f"  → Celkem {total} inzerátů, ~{total_pages} stran")
+
+        if not adverts:
+            break
+
+        for adv in adverts:
+            adv_id = adv.get("id")
+            if adv_id and adv_id not in all_adverts:
+                all_adverts[adv_id] = adv
+
+        page += 1
+        if total and page > math.ceil(total / PER_PAGE):
+            break
+        time.sleep(0.5)
+
+    print(f"\n  Staženo: {len(all_adverts)} unikátních inzerátů")
+
+    # Step 2: Pre-filter by disposition, price, area from list data
+    pre_filtered = []
+    excluded_disp = 0
+    excluded_price = 0
+    excluded_area = 0
+    excluded_no_gps = 0
+
+    for adv in all_adverts.values():
+        disp = adv.get("disposition", "")
+        if disp not in WANTED_DISPOSITIONS:
+            excluded_disp += 1
+            continue
+
+        price = adv.get("price", 0) or 0
+        if price > MAX_PRICE or price == 0:
+            excluded_price += 1
+            continue
+
+        surface = adv.get("surface")
+        if surface is not None and surface < MIN_AREA:
+            excluded_area += 1
+            continue
+
+        gps = adv.get("gps", {})
+        if not gps or not gps.get("lat") or not gps.get("lng"):
+            excluded_no_gps += 1
+            continue
+
+        pre_filtered.append(adv)
+
+    print(f"\nPo předfiltraci:")
+    print(f"  Vyloučeno (dispozice): {excluded_disp}")
+    print(f"  Vyloučeno (cena):      {excluded_price}")
+    print(f"  Vyloučeno (plocha):    {excluded_area}")
+    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    print(f"  Zbývá:                 {len(pre_filtered)}")
+
+    # Step 3: Fetch details
+    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    results = []
+    excluded_panel = 0
+    excluded_floor = 0
+    excluded_detail = 0
+    cache_hits = 0
+
+    for i, adv in enumerate(pre_filtered):
+        uri = adv.get("uri", "")
+        if not uri:
+            excluded_detail += 1
+            continue
+
+        # Check cache — if hash_id exists and price unchanged, reuse
+        adv_id = int(adv["id"])
+        adv_price = adv.get("price", 0) or 0
+        cached = cache.get(adv_id)
+        if cached and cached.get("price") == adv_price:
+            cache_hits += 1
+            results.append(cached)
+            continue
+
+        time.sleep(0.4)
+        detail = fetch_detail(uri)
+
+        if not detail:
+            excluded_detail += 1
+            continue
+
+        # Check construction — exclude panel
+        construction = detail.get("construction", "")
+        if construction == "PANEL":
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen #{adv['id']}: panel")
+            continue
+
+        # Check situation — exclude sídliště
+        situation = detail.get("situation", "")
+        if situation and "HOUSING_ESTATE" in str(situation).upper():
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen #{adv['id']}: sídliště")
+            continue
+
+        # Check floor (etage)
+        etage = detail.get("etage")
+        if etage is not None and etage < MIN_FLOOR:
+            excluded_floor += 1
+            continue
+
+        gps = adv.get("gps", {})
+        disp = adv.get("disposition", "")
+
+        # Get address — key includes locale parameter
+        address = ""
+        for key in detail:
+            if key.startswith("address(") and "withHouseNumber" not in key:
+                address = detail[key]
+                break
+        if not address:
+            for key in detail:
+                if key.startswith("address("):
+                    address = detail[key]
+                    break
+        if not address:
+            address = adv.get('address({"locale":"CS"})', "Praha")
+
+        result = {
+            "hash_id": int(adv["id"]),
+            "name": f"Prodej bytu {DISPOSITION_LABELS.get(disp, '?')} {adv.get('surface', '?')} m²",
+            "price": adv.get("price", 0),
+            "price_formatted": format_price(adv.get("price", 0)),
+            "locality": address,
+            "lat": gps["lat"],
+            "lon": gps["lng"],
+            "disposition": DISPOSITION_LABELS.get(disp, "?"),
+            "floor": etage,
+            "area": adv.get("surface"),
+            "building_type": CONSTRUCTION_MAP.get(construction, construction or "neuvedeno"),
+            "ownership": OWNERSHIP_MAP.get(detail.get("ownership", ""), detail.get("ownership") or "neuvedeno"),
+            "url": f"{BASE_URL}/nemovitosti-byty-domy/{uri}",
+            "source": "bezrealitky",
+            "image": "",
+        }
+        results.append(result)
+
+        if (i + 1) % 20 == 0:
+            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+
+    print(f"\n{'=' * 60}")
+    print(f"Výsledky Bezrealitky:")
+    print(f"  Předfiltrováno:        {len(pre_filtered)}")
+    print(f"  Z cache (přeskočeno): {cache_hits}")
+    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    print(f"  Vyloučeno (patro):     {excluded_floor}")
+    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    print(f"  ✓ Vyhovující byty:    {len(results)}")
+    print(f"{'=' * 60}")
+
+    return results
+
+
+if __name__ == "__main__":
+    start = time.time()
+    estates = scrape()
+
+    if estates:
+        json_path = Path("byty_bezrealitky.json")
+        json_path.write_text(
+            json.dumps(estates, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        elapsed = time.time() - start
+        print(f"\n✓ Data uložena: {json_path.resolve()}")
+        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+    else:
+        print("\nŽádné byty z Bezrealitek neodpovídají kritériím :(")
--- a/scrape_cityhome.py
+++ b/scrape_cityhome.py
@@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+"""
+CityHome (city-home.cz) scraper.
+Stáhne byty na prodej v Praze z projektů CityHome/SATPO.
+Výstup: byty_cityhome.json
+"""
+from __future__ import annotations
+
+import json
+import re
+import time
+import urllib.request
+from pathlib import Path
+
+# ── Konfigurace ─────────────────────────────────────────────────────────────
+
+MAX_PRICE = 14_000_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+
+WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml",
+    "Accept-Language": "cs,en;q=0.9",
+}
+
+BASE_URL = "https://www.city-home.cz"
+
+
+def fetch_url(url: str) -> str:
+    """Fetch URL and return HTML string."""
+    for attempt in range(3):
+        try:
+            req = urllib.request.Request(url, headers=HEADERS)
+            resp = urllib.request.urlopen(req, timeout=30)
+            return resp.read().decode("utf-8")
+        except (ConnectionResetError, ConnectionError, urllib.error.URLError) as e:
+            if attempt < 2:
+                time.sleep((attempt + 1) * 2)
+                print(f"    Retry {attempt + 1}: {e}")
+            else:
+                raise
+
+
+def format_price(price: int) -> str:
+    s = str(price)
+    parts = []
+    while s:
+        parts.append(s[-3:])
+        s = s[:-3]
+    return " ".join(reversed(parts)) + " Kč"
+
+
+def parse_filter_page(html: str) -> list[dict]:
+    """Parse all listing rows from the filter page."""
+    listings = []
+
+    # Find all <tr> with data-cena attribute
+    row_pattern = re.compile(
+        r'<tr[^>]*'
+        r'data-cena="(\d+)"[^>]*'
+        r'data-plocha="([\d.]+)"[^>]*'
+        r'data-unittype="(\d+)"[^>]*'
+        r'data-free="(yes|no)"[^>]*'
+        r'data-project="(\d+)"[^>]*'
+        r'data-transaction="([^"]*)"[^>]*'
+        r'data-dispozition="([^"]*)"[^>]*'
+        r'data-location="([^"]*)"[^>]*'
+        r'>(.*?)</tr>',
+        re.DOTALL
+    )
+
+    # Also try with different attribute order
+    rows = re.findall(r'<tr[^>]*data-cena="[^"]*"[^>]*>(.*?)</tr>', html, re.DOTALL)
+
+    for row_html in rows:
+        # Extract data attributes from the surrounding <tr>
+        tr_match = re.search(
+            r'<tr[^>]*data-cena="([^"]*)"[^>]*data-plocha="([^"]*)"[^>]*'
+            r'data-unittype="([^"]*)"[^>]*data-free="([^"]*)"[^>]*'
+            r'data-project="([^"]*)"[^>]*data-transaction="([^"]*)"[^>]*'
+            r'data-dispozition="([^"]*)"[^>]*data-location="([^"]*)"',
+            html
+        )
+
+        # More flexible: search around each row
+        pass
+
+    # Better approach: find each tr tag with all its attributes
+    for match in re.finditer(r'<tr\s+([^>]*data-cena="[^"]*"[^>]*)>(.*?)</tr>', html, re.DOTALL):
+        attrs_str = match.group(1)
+        row_content = match.group(2)
+
+        # Extract all data attributes
+        cena = re.search(r'data-cena="(\d+)"', attrs_str)
+        plocha = re.search(r'data-plocha="([\d.]+)"', attrs_str)
+        unittype = re.search(r'data-unittype="(\d+)"', attrs_str)
+        free = re.search(r'data-free="(yes|no)"', attrs_str)
+        project = re.search(r'data-project="(\d+)"', attrs_str)
+        transaction = re.search(r'data-transaction="([^"]*)"', attrs_str)
+        dispozition = re.search(r'data-dispozition="([^"]*)"', attrs_str)
+        location = re.search(r'data-location="([^"]*)"', attrs_str)
+
+        if not cena:
+            continue
+
+        # Extract detail URL and unit name from first cell
+        link_match = re.search(r'<a[^>]*href="([^"]*)"[^>]*>(.*?)</a>', row_content, re.DOTALL)
+        detail_url = link_match.group(1).strip() if link_match else ""
+        unit_name = re.sub(r'<[^>]+>', '', link_match.group(2)).strip() if link_match else ""
+
+        if detail_url and not detail_url.startswith("http"):
+            detail_url = BASE_URL + detail_url
+
+        # Extract floor from cells — look for pattern like "3.NP" or "2.PP"
+        cells = re.findall(r'<td[^>]*>(.*?)</td>', row_content, re.DOTALL)
+        floor = None
+        floor_text = ""
+        project_name = ""
+
+        for cell in cells:
+            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
+            # Floor pattern
+            np_match = re.search(r'(\d+)\.\s*NP', cell_text)
+            pp_match = re.search(r'(\d+)\.\s*PP', cell_text)
+            if np_match:
+                floor = int(np_match.group(1))
+                floor_text = cell_text
+            elif pp_match:
+                floor = -int(pp_match.group(1))  # Underground
+                floor_text = cell_text
+
+        # Extract project name — usually in a cell that's not a number/price/floor
+        for cell in cells:
+            cell_text = re.sub(r'<[^>]+>', '', cell).strip()
+            if cell_text and not re.match(r'^[\d\s.,]+$', cell_text) and "NP" not in cell_text and "PP" not in cell_text and "m²" not in cell_text and "Kč" not in cell_text and "EUR" not in cell_text and "CZK" not in cell_text:
+                if len(cell_text) > 3 and cell_text != unit_name:
+                    project_name = cell_text
+                    break
+
+        listing = {
+            "price": int(cena.group(1)),
+            "area": float(plocha.group(1)) if plocha else 0,
+            "unittype": int(unittype.group(1)) if unittype else 0,
+            "free": free.group(1) if free else "no",
+            "project_id": project.group(1) if project else "",
+            "transaction": transaction.group(1) if transaction else "",
+            "disposition": dispozition.group(1) if dispozition else "",
+            "location": location.group(1) if location else "",
+            "url": detail_url,
+            "unit_name": unit_name,
+            "floor": floor,
+            "project_name": project_name,
+        }
+        listings.append(listing)
+
+    return listings
+
+
+def extract_project_gps(html: str) -> dict[str, tuple[float, float]]:
+    """Extract GPS coordinates for projects from locality pages."""
+    # Pattern in JS: ['<h4>Project Name</h4>...', 'LAT', 'LON', '1', 'Name']
+    gps_data = {}
+    for match in re.finditer(r"\['[^']*<h4>([^<]+)</h4>[^']*',\s*'([\d.]+)',\s*'([\d.]+)'", html):
+        name = match.group(1).strip()
+        lat = float(match.group(2))
+        lon = float(match.group(3))
+        gps_data[name] = (lat, lon)
+    return gps_data
+
+
+def scrape():
+    print("=" * 60)
+    print("Stahuji inzeráty z CityHome (city-home.cz)")
+    print(f"Cena: do {format_price(MAX_PRICE)}")
+    print(f"Min. plocha: {MIN_AREA} m²")
+    print(f"Patro: od {MIN_FLOOR}. NP")
+    print("=" * 60)
+
+    # Step 1: Fetch the main filter page
+    print("\nFáze 1: Stahování seznamu bytů...")
+    html = fetch_url(f"{BASE_URL}/filtr-nemovitosti1")
+    all_listings = parse_filter_page(html)
+    print(f"  Nalezeno: {len(all_listings)} jednotek")
+
+    # Step 2: Collect unique project slugs from detail URLs to fetch GPS
+    print("\nFáze 2: Stahování GPS souřadnic projektů...")
+    project_slugs = set()
+    for listing in all_listings:
+        url = listing.get("url", "")
+        # /projekty/zateckych-14/nabidka-nemovitosti/byt-a31
+        slug_match = re.search(r'/(?:projekty|bytove-domy)/([^/]+)/', url)
+        if slug_match:
+            project_slugs.add(slug_match.group(1))
+
+    # Fetch GPS for each project from locality pages
+    project_gps = {}
+    for slug in sorted(project_slugs):
+        time.sleep(0.5)
+        try:
+            locality_url = f"{BASE_URL}/projekty/{slug}/lokalita"
+            loc_html = fetch_url(locality_url)
+            gps = extract_project_gps(loc_html)
+            if gps:
+                # Take first entry (the project itself)
+                first_name, (lat, lon) = next(iter(gps.items()))
+                project_gps[slug] = (lat, lon)
+                print(f"  ✓ {slug}: {lat}, {lon}")
+            else:
+                print(f"  ✗ {slug}: GPS nenalezeno")
+        except Exception as e:
+            print(f"  ✗ {slug}: chyba ({e})")
+
+    # Step 3: Filter listings
+    print(f"\nFáze 3: Filtrování...")
+    results = []
+    excluded_sold = 0
+    excluded_type = 0
+    excluded_disp = 0
+    excluded_price = 0
+    excluded_area = 0
+    excluded_floor = 0
+    excluded_no_gps = 0
+
+    for listing in all_listings:
+        # Only available units
+        if listing["free"] != "yes":
+            excluded_sold += 1
+            continue
+
+        # Only apartments (unittype=2)
+        if listing["unittype"] != 2:
+            excluded_type += 1
+            continue
+
+        # Only sales
+        if listing["transaction"] != "prodej":
+            excluded_type += 1
+            continue
+
+        # Disposition
+        disp = listing["disposition"]
+        if disp not in WANTED_DISPOSITIONS:
+            excluded_disp += 1
+            continue
+
+        # Price
+        price = listing["price"]
+        if price <= 0 or price > MAX_PRICE:
+            excluded_price += 1
+            continue
+
+        # Area
+        area = listing["area"]
+        if area < MIN_AREA:
+            excluded_area += 1
+            continue
+
+        # Floor
+        floor = listing["floor"]
+        if floor is not None and floor < MIN_FLOOR:
+            excluded_floor += 1
+            continue
+
+        # GPS from project
+        url = listing.get("url", "")
+        slug_match = re.search(r'/(?:projekty|bytove-domy)/([^/]+)/', url)
+        slug = slug_match.group(1) if slug_match else ""
+        gps = project_gps.get(slug)
+
+        if not gps:
+            excluded_no_gps += 1
+            continue
+
+        lat, lon = gps
+
+        result = {
+            "hash_id": f"cityhome_{slug}_{listing['unit_name']}",
+            "name": f"Prodej bytu {disp} {area} m² — {listing['project_name']}",
+            "price": price,
+            "price_formatted": format_price(price),
+            "locality": f"{listing['project_name']}, Praha",
+            "lat": lat,
+            "lon": lon,
+            "disposition": disp,
+            "floor": floor,
+            "area": area,
+            "building_type": "Cihlová",  # CityHome renovuje cihlové domy
+            "ownership": "neuvedeno",
+            "url": url,
+            "source": "cityhome",
+            "image": "",
+        }
+        results.append(result)
+
+    print(f"\n{'=' * 60}")
+    print(f"Výsledky CityHome:")
+    print(f"  Celkem jednotek:       {len(all_listings)}")
+    print(f"  Vyloučeno (prodáno):   {excluded_sold}")
+    print(f"  Vyloučeno (typ):       {excluded_type}")
+    print(f"  Vyloučeno (dispozice): {excluded_disp}")
+    print(f"  Vyloučeno (cena):      {excluded_price}")
+    print(f"  Vyloučeno (plocha):    {excluded_area}")
+    print(f"  Vyloučeno (patro):     {excluded_floor}")
+    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    print(f"  ✓ Vyhovující byty:    {len(results)}")
+    print(f"{'=' * 60}")
+
+    return results
+
+
+if __name__ == "__main__":
+    start = time.time()
+    estates = scrape()
+
+    if estates:
+        json_path = Path("byty_cityhome.json")
+        json_path.write_text(
+            json.dumps(estates, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        elapsed = time.time() - start
+        print(f"\n✓ Data uložena: {json_path.resolve()}")
+        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+    else:
+        print("\nŽádné byty z CityHome neodpovídají kritériím :(")
--- a/scrape_idnes.py
+++ b/scrape_idnes.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+"""
+Reality iDNES scraper.
+Stáhne byty na prodej v Praze a vyfiltruje podle kritérií.
+Výstup: byty_idnes.json
+"""
+from __future__ import annotations
+
+import json
+import math
+import re
+import time
+import urllib.request
+import urllib.parse
+from html.parser import HTMLParser
+from pathlib import Path
+
+# ── Konfigurace ─────────────────────────────────────────────────────────────
+
+MAX_PRICE = 13_500_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+PER_PAGE = 26  # iDNES vrací 26 na stránku
+
+# Dispozice — kódy pro s-qc[subtypeFlat]
+DISPOSITION_CODES = "3k|31|4k|41|5k|51|6k"
+
+# Mapování dispozice z titulku na label
+DISPOSITION_MAP = {
+    "3+kk": "3+kk", "3+1": "3+1",
+    "4+kk": "4+kk", "4+1": "4+1",
+    "5+kk": "5+kk", "5+1": "5+1",
+    "6+kk": "6+", "6+1": "6+",
+    "6 a více": "6+",
+}
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "cs,en;q=0.9",
+    "Accept-Encoding": "identity",
+    "Connection": "keep-alive",
+}
+
+BASE_URL = "https://reality.idnes.cz"
+
+MAX_RETRIES = 5
+
+
+def fetch_url(url: str) -> str:
+    """Fetch URL and return HTML string with retry logic."""
+    for attempt in range(MAX_RETRIES):
+        try:
+            req = urllib.request.Request(url, headers=HEADERS)
+            resp = urllib.request.urlopen(req, timeout=30)
+            data = resp.read()
+            return data.decode("utf-8")
+        except (ConnectionResetError, ConnectionError, urllib.error.URLError,
+                OSError) as e:
+            if attempt < MAX_RETRIES - 1:
+                wait = (attempt + 1) * 3  # 3, 6, 9, 12s
+                print(f"    Retry {attempt + 1}/{MAX_RETRIES} (wait {wait}s): {e}")
+                time.sleep(wait)
+            else:
+                raise
+
+
+def build_list_url(page: int = 0) -> str:
+    """Build listing URL with all filters."""
+    base = f"{BASE_URL}/s/prodej/byty/cena-do-{MAX_PRICE}/praha/"
+    params = {
+        "s-qc[subtypeFlat]": DISPOSITION_CODES,
+        "s-qc[usableAreaMin]": str(MIN_AREA),
+    }
+    url = f"{base}?{urllib.parse.urlencode(params)}"
+    if page > 0:
+        url += f"&page={page}"
+    return url
+
+
+def parse_total_count(html: str) -> int:
+    """Extract total listing count from page."""
+    # Look for "720 inzerátů" or similar
+    match = re.search(r'(\d[\d\s]*)\s*inzerát', html)
+    if match:
+        return int(match.group(1).replace(" ", "").replace("\xa0", ""))
+    return 0
+
+
+def parse_listings(html: str) -> list[dict]:
+    """Parse listing cards from HTML using regex."""
+    results = []
+
+    # Find each listing block — look for c-products__link with detail URL
+    # Pattern: <a ... class="c-products__link" href="/detail/..."> ... block ... </a>
+    # Each listing card contains: title (h2), price (strong), info (p.c-products__info)
+
+    # Split by listing items, skip ads
+    items = re.findall(
+        r'<div[^>]*class="c-products__item(?:(?!advertisment)[^"]*)"[^>]*>(.*?)</div>\s*</div>\s*</div>',
+        html, re.DOTALL
+    )
+
+    # Alternative: find all detail links and extract surrounding context
+    # More robust approach: find each detail link and parse nearby elements
+    link_pattern = re.compile(
+        r'<a[^>]*href="([^"]*?/detail/[^"]*?)"[^>]*class="c-products__link"[^>]*>',
+        re.DOTALL
+    )
+    # Also match when class comes before href
+    link_pattern2 = re.compile(
+        r'<a[^>]*class="c-products__link"[^>]*href="([^"]*?/detail/[^"]*?)"[^>]*>',
+        re.DOTALL
+    )
+
+    # Find all c-products__link anchors
+    all_links = link_pattern.findall(html) + link_pattern2.findall(html)
+    seen_urls = set()
+
+    # For each link, find the surrounding product block
+    for link_url in all_links:
+        if link_url in seen_urls:
+            continue
+        seen_urls.add(link_url)
+
+        # Find context around this link (the product card)
+        escaped_url = re.escape(link_url)
+        context_match = re.search(
+            escaped_url + r'(.*?)</div>\s*</div>',
+            html, re.DOTALL
+        )
+        if not context_match:
+            continue
+
+        block = context_match.group(1)
+
+        # Ensure full URL
+        url = link_url
+        if not url.startswith("http"):
+            url = BASE_URL + url
+
+        # Skip ads
+        ad_check_start = max(0, context_match.start() - 500)
+        ad_block = html[ad_check_start:context_match.start()]
+        if "advertisment" in ad_block or "advertisement" in ad_block:
+            continue
+
+        # Parse title: <h2 class="c-products__title">prodej bytu 3+kk 79 m2</h2>
+        title_match = re.search(r'class="c-products__title"[^>]*>(.*?)</h2>', block, re.DOTALL)
+        title = re.sub(r'<[^>]+>', '', title_match.group(1)).strip().lower() if title_match else ""
+
+        # Parse price: <p class="c-products__price"><strong>12 950 000 Kč</strong></p>
+        price_match = re.search(r'c-products__price[^>]*>.*?<strong>(.*?)</strong>', block, re.DOTALL)
+        price_text = re.sub(r'<[^>]+>', '', price_match.group(1)).strip() if price_match else ""
+
+        # Parse address: <p class="c-products__info">Klečkova, Praha 5 - Stodůlky</p>
+        info_match = re.search(r'class="c-products__info"[^>]*>(.*?)</p>', block, re.DOTALL)
+        info = re.sub(r'<[^>]+>', '', info_match.group(1)).strip() if info_match else ""
+
+        # Parse disposition and area from title
+        disp_match = re.search(r'(\d\+(?:kk|\d))', title)
+        area_match = re.search(r'(\d+)\s*m[²2]', title)
+
+        disposition = disp_match.group(1) if disp_match else None
+        area = int(area_match.group(1)) if area_match else None
+
+        if not disposition and ("6 a" in title or "6+" in title):
+            disposition = "6+"
+
+        # Parse price
+        price = 0
+        if price_text and "vyžádání" not in price_text.lower():
+            price_clean = re.sub(r'[^\d]', '', price_text)
+            if price_clean:
+                price = int(price_clean)
+
+        # Extract listing ID from URL
+        id_match = re.search(r'/([a-f0-9]{24})/?', url)
+        listing_id = id_match.group(1) if id_match else url
+
+        results.append({
+            "id": listing_id,
+            "url": url,
+            "disposition": DISPOSITION_MAP.get(disposition, disposition or "?"),
+            "area": area,
+            "price": price,
+            "locality": info,
+        })
+
+    return results
+
+
+def parse_detail(html: str) -> dict:
+    """Parse detail page for GPS, floor, construction, ownership."""
+    detail = {}
+
+    # 1. Parse dataLayer.push() for GPS and other data
+    dl_match = re.search(
+        r'dataLayer\.push\(\s*(\{[^}]+?"listing_lat"[^}]+?\})\s*\)',
+        html, re.DOTALL
+    )
+    if dl_match:
+        # Clean up JS object to valid JSON
+        js_obj = dl_match.group(1)
+        # Replace single quotes with double, handle trailing commas, etc.
+        # The dataLayer is usually valid JSON-like, let's try parsing
+        try:
+            # Remove JS comments, handle unquoted keys
+            # Most importantly: listing_lat, listing_lon, listing_price, listing_area
+            lat_match = re.search(r'"listing_lat"\s*:\s*([\d.]+)', js_obj)
+            lon_match = re.search(r'"listing_lon"\s*:\s*([\d.]+)', js_obj)
+            if lat_match:
+                detail["lat"] = float(lat_match.group(1))
+            if lon_match:
+                detail["lon"] = float(lon_match.group(1))
+        except (ValueError, AttributeError):
+            pass
+
+    # 2. Parse DT/DD pairs for floor, construction, ownership
+    # Pattern: <dt>Label</dt><dd>Value</dd>
+    dt_dd_pairs = re.findall(
+        r'<dt[^>]*>(.*?)</dt>\s*<dd[^>]*>(.*?)</dd>',
+        html, re.DOTALL
+    )
+
+    for dt, dd in dt_dd_pairs:
+        dt_clean = re.sub(r'<[^>]+>', '', dt).strip().lower()
+        dd_clean = re.sub(r'<[^>]+>', '', dd).strip()
+
+        if "podlaží" in dt_clean or "podlazi" in dt_clean or "patro" in dt_clean:
+            # "2. patro (3. NP)" or "3. podlaží z celkem 5"
+            # Try to find NP first
+            np_match = re.search(r'(\d+)\.\s*NP', dd_clean)
+            if np_match:
+                detail["floor"] = int(np_match.group(1))
+            else:
+                # Try "X. patro" — patro = NP - 1 usually, but iDNES seems to use NP directly
+                patro_match = re.search(r'(\d+)', dd_clean)
+                if patro_match:
+                    detail["floor"] = int(patro_match.group(1))
+
+        if "konstrukce" in dt_clean or "stavba" in dt_clean:
+            detail["construction"] = dd_clean.lower()
+
+        if "vlastnictví" in dt_clean or "vlastnictvi" in dt_clean:
+            detail["ownership"] = dd_clean
+
+    return detail
+
+
+def format_price(price: int) -> str:
+    s = str(price)
+    parts = []
+    while s:
+        parts.append(s[-3:])
+        s = s[:-3]
+    return " ".join(reversed(parts)) + " Kč"
+
+
+def load_cache(json_path: str = "byty_idnes.json") -> dict[str, dict]:
+    """Load previously scraped data as cache keyed by hash_id."""
+    path = Path(json_path)
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return {str(e["hash_id"]): e for e in data if "hash_id" in e}
+    except (json.JSONDecodeError, KeyError):
+        return {}
+
+
+def scrape():
+    cache = load_cache()
+
+    print("=" * 60)
+    print("Stahuji inzeráty z Reality iDNES")
+    print(f"Cena: do {format_price(MAX_PRICE)}")
+    print(f"Min. plocha: {MIN_AREA} m²")
+    print(f"Patro: od {MIN_FLOOR}. NP")
+    print(f"Region: Praha")
+    if cache:
+        print(f"Cache: {len(cache)} bytů z minulého běhu")
+    print("=" * 60)
+
+    # Step 1: Fetch listing pages
+    print("\nFáze 1: Stahování seznamu inzerátů...")
+    all_listings = {}  # id -> listing dict
+    page = 0
+    total = None
+
+    while True:
+        url = build_list_url(page)
+        print(f"  Strana {page + 1} ...")
+        html = fetch_url(url)
+
+        if total is None:
+            total = parse_total_count(html)
+            total_pages = math.ceil(total / PER_PAGE) if total > 0 else 1
+            print(f"  → Celkem {total} inzerátů, ~{total_pages} stran")
+
+        listings = parse_listings(html)
+
+        if not listings:
+            break
+
+        for item in listings:
+            lid = item["id"]
+            if lid not in all_listings:
+                all_listings[lid] = item
+
+        page += 1
+        if total and page >= math.ceil(total / PER_PAGE):
+            break
+        time.sleep(1.0)
+
+    print(f"\n  Staženo: {len(all_listings)} unikátních inzerátů")
+
+    # Step 2: Pre-filter by price and area from list data
+    pre_filtered = []
+    excluded_price = 0
+    excluded_area = 0
+    excluded_disp = 0
+
+    for item in all_listings.values():
+        if item["price"] <= 0 or item["price"] > MAX_PRICE:
+            excluded_price += 1
+            continue
+
+        if item["area"] is not None and item["area"] < MIN_AREA:
+            excluded_area += 1
+            continue
+
+        if item["disposition"] == "?":
+            excluded_disp += 1
+            continue
+
+        pre_filtered.append(item)
+
+    print(f"\nPo předfiltraci:")
+    print(f"  Vyloučeno (cena):      {excluded_price}")
+    print(f"  Vyloučeno (plocha):    {excluded_area}")
+    print(f"  Vyloučeno (dispozice): {excluded_disp}")
+    print(f"  Zbývá:                 {len(pre_filtered)}")
+
+    # Step 3: Fetch details for GPS, floor, construction
+    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    results = []
+    excluded_panel = 0
+    excluded_floor = 0
+    excluded_no_gps = 0
+    excluded_detail = 0
+    cache_hits = 0
+
+    for i, item in enumerate(pre_filtered):
+        # Check cache — if hash_id exists and price unchanged, reuse
+        cached = cache.get(str(item["id"]))
+        if cached and cached.get("price") == item["price"]:
+            cache_hits += 1
+            results.append(cached)
+            continue
+
+        url = item["url"]
+        time.sleep(0.4)
+
+        try:
+            html = fetch_url(url)
+        except Exception as e:
+            print(f"    Warning: detail failed for {item['id']}: {e}")
+            excluded_detail += 1
+            continue
+
+        detail = parse_detail(html)
+
+        # Must have GPS
+        if not detail.get("lat") or not detail.get("lon"):
+            excluded_no_gps += 1
+            continue
+
+        # Check construction — exclude panel
+        construction = detail.get("construction", "")
+        if "panel" in construction:
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen {item['id'][:12]}...: panel ({construction})")
+            continue
+
+        # Check for sídliště in construction/description
+        if "sídliště" in construction or "sidliste" in construction:
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen {item['id'][:12]}...: sídliště")
+            continue
+
+        # Check floor
+        floor = detail.get("floor")
+        if floor is not None and floor < MIN_FLOOR:
+            excluded_floor += 1
+            continue
+
+        # Map construction to Czech label
+        building_type = "neuvedeno"
+        if construction:
+            if "cihlo" in construction or "cihla" in construction:
+                building_type = "Cihlová"
+            elif "smíšen" in construction or "smisen" in construction:
+                building_type = "Smíšená"
+            elif "skelet" in construction:
+                building_type = "Skeletová"
+            elif "dřevo" in construction or "drevo" in construction:
+                building_type = "Dřevostavba"
+            elif "mont" in construction:
+                building_type = "Montovaná"
+            else:
+                building_type = construction.capitalize()
+
+        result = {
+            "hash_id": item["id"],
+            "name": f"Prodej bytu {item['disposition']} {item.get('area', '?')} m²",
+            "price": item["price"],
+            "price_formatted": format_price(item["price"]),
+            "locality": item["locality"],
+            "lat": detail["lat"],
+            "lon": detail["lon"],
+            "disposition": item["disposition"],
+            "floor": floor,
+            "area": item["area"],
+            "building_type": building_type,
+            "ownership": detail.get("ownership", "neuvedeno"),
+            "url": item["url"],
+            "source": "idnes",
+            "image": "",
+        }
+        results.append(result)
+
+        if (i + 1) % 20 == 0:
+            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+
+    print(f"\n{'=' * 60}")
+    print(f"Výsledky Reality iDNES:")
+    print(f"  Předfiltrováno:        {len(pre_filtered)}")
+    print(f"  Z cache (přeskočeno): {cache_hits}")
+    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    print(f"  Vyloučeno (patro):     {excluded_floor}")
+    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    print(f"  ✓ Vyhovující byty:    {len(results)}")
+    print(f"{'=' * 60}")
+
+    return results
+
+
+if __name__ == "__main__":
+    start = time.time()
+    estates = scrape()
+
+    if estates:
+        json_path = Path("byty_idnes.json")
+        json_path.write_text(
+            json.dumps(estates, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        elapsed = time.time() - start
+        print(f"\n✓ Data uložena: {json_path.resolve()}")
+        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+    else:
+        print("\nŽádné byty z Reality iDNES neodpovídají kritériím :(")
--- a/scrape_psn.py
+++ b/scrape_psn.py
@@ -0,0 +1,306 @@
+#!/usr/bin/env python3
+"""
+PSN.cz scraper.
+Stáhne byty na prodej v Praze z projektů PSN a vyfiltruje podle kritérií.
+Výstup: byty_psn.json
+"""
+from __future__ import annotations
+
+import json
+import re
+import subprocess
+import time
+from pathlib import Path
+
+# ── Konfigurace ─────────────────────────────────────────────────────────────
+
+MAX_PRICE = 14_000_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+
+WANTED_DISPOSITIONS = {"3+kk", "3+1", "4+kk", "4+1", "5+kk", "5+1", "6+kk", "6+1"}
+
+UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+
+BASE_URL = "https://psn.cz"
+
+# Known Prague project slugs with GPS (from research)
+PRAGUE_PROJECTS = [
+    {"slug": "zit-branik", "name": "Žít Braník", "lat": 50.0353, "lon": 14.4125},
+    {"slug": "rostislavova-4", "name": "Rostislavova 4", "lat": 50.0620, "lon": 14.4463},
+    {"slug": "pod-drinopolem", "name": "Pod Drinopolem", "lat": 50.0851, "lon": 14.3720},
+    {"slug": "skyline-chodov", "name": "Skyline Chodov", "lat": 50.0418, "lon": 14.4990},
+    {"slug": "jitro", "name": "Jitro", "lat": 50.0729, "lon": 14.4768},
+    {"slug": "maroldka", "name": "Maroldka", "lat": 50.0614, "lon": 14.4517},
+    {"slug": "belehradska-29", "name": "Bělehradská 29", "lat": 50.0682, "lon": 14.4348},
+    {"slug": "jeseniova-93", "name": "Jeseniova 93", "lat": 50.0887, "lon": 14.4692},
+    {"slug": "vanguard", "name": "Vanguard", "lat": 50.0164, "lon": 14.4036},
+    {"slug": "vinohradska-160", "name": "Vinohradská 160", "lat": 50.0780, "lon": 14.4653},
+    {"slug": "hermanova24", "name": "Heřmanova 24", "lat": 50.1009, "lon": 14.4313},
+    {"slug": "vinohradska-8", "name": "Vinohradská 8", "lat": 50.0787, "lon": 14.4342},
+    {"slug": "bydleni-na-vysinach", "name": "Bydlení Na Výšinách", "lat": 50.1003, "lon": 14.4187},
+    {"slug": "bydleni-u-pekaren", "name": "Bydlení U Pekáren", "lat": 50.0555, "lon": 14.5414},
+    {"slug": "pechackova-6", "name": "Pechackova 6", "lat": 50.0734, "lon": 14.4063},
+    {"slug": "ahoj-vanguard", "name": "Ahoj Vanguard", "lat": 50.0164, "lon": 14.4033},
+]
+
+
+def fetch_url(url: str) -> str:
+    """Fetch URL via curl (urllib SSL too old for Cloudflare)."""
+    result = subprocess.run(
+        ["curl", "-s", "-L", "--max-time", "30",
+         "-H", f"User-Agent: {UA}",
+         "-H", "Accept: text/html",
+         url],
+        capture_output=True, text=True, timeout=60
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"curl failed ({result.returncode}): {result.stderr[:200]}")
+    return result.stdout
+
+
+def extract_units_from_html(html: str) -> list[dict]:
+    """Extract unit JSON objects from raw HTML with escaped quotes."""
+    # The HTML contains RSC data with escaped JSON: \\"key\\":\\"value\\"
+    # Step 1: Unescape the double-backslash-quotes to regular quotes
+    cleaned = html.replace('\\"', '"')
+
+    # Step 2: Find each unit by looking for "title":"Byt and walking back to {
+    units = []
+    decoder = json.JSONDecoder()
+
+    for m in re.finditer(r'"title":"Byt', cleaned):
+        pos = m.start()
+        # Walk backwards to find the opening brace
+        depth = 0
+        found = False
+        for i in range(pos - 1, max(pos - 3000, 0), -1):
+            if cleaned[i] == '}':
+                depth += 1
+            elif cleaned[i] == '{':
+                if depth == 0:
+                    try:
+                        obj, end = decoder.raw_decode(cleaned, i)
+                        if isinstance(obj, dict) and 'price_czk' in obj:
+                            units.append(obj)
+                            found = True
+                    except (json.JSONDecodeError, ValueError):
+                        pass
+                    break
+                depth -= 1
+
+    return units
+
+
+def format_price(price: int) -> str:
+    s = str(price)
+    parts = []
+    while s:
+        parts.append(s[-3:])
+        s = s[:-3]
+    return " ".join(reversed(parts)) + " Kč"
+
+
+def scrape():
+    print("=" * 60)
+    print("Stahuji inzeráty z PSN.cz")
+    print(f"Cena: do {format_price(MAX_PRICE)}")
+    print(f"Min. plocha: {MIN_AREA} m²")
+    print(f"Patro: od {MIN_FLOOR}. NP")
+    print(f"Region: Praha ({len(PRAGUE_PROJECTS)} projektů)")
+    print("=" * 60)
+
+    # Fetch units from each Prague project
+    all_units = []
+
+    for proj in PRAGUE_PROJECTS:
+        page = 1
+        project_units = []
+
+        while True:
+            url = f"{BASE_URL}/projekt/{proj['slug']}?page={page}"
+            print(f"  {proj['name']} — strana {page} ...")
+            time.sleep(0.5)
+
+            try:
+                html = fetch_url(url)
+            except Exception as e:
+                print(f"    Chyba: {e}")
+                break
+
+            units = extract_units_from_html(html)
+
+            if not units:
+                if page == 1:
+                    print(f"    → 0 jednotek")
+                break
+
+            # Add project info to each unit
+            for unit in units:
+                if not unit.get("latitude") or not unit.get("longitude"):
+                    unit["latitude"] = proj["lat"]
+                    unit["longitude"] = proj["lon"]
+                unit["_project_name"] = proj["name"]
+                unit["_project_slug"] = proj["slug"]
+
+            project_units.extend(units)
+
+            if page == 1:
+                print(f"    → {len(units)} jednotek na stránce")
+
+            # Check if there might be more pages
+            # If we got fewer than expected or same units, stop
+            if len(units) < 10:
+                break
+
+            page += 1
+            if page > 10:  # Safety limit
+                break
+
+        all_units.extend(project_units)
+
+    # Deduplicate by slug
+    seen_slugs = set()
+    unique_units = []
+    for u in all_units:
+        slug = u.get("slug", "")
+        if slug and slug not in seen_slugs:
+            seen_slugs.add(slug)
+            unique_units.append(u)
+        elif not slug:
+            unique_units.append(u)
+
+    print(f"\n  Staženo celkem: {len(unique_units)} unikátních jednotek")
+
+    # Filter
+    print(f"\nFiltrování...")
+    results = []
+    excluded_sold = 0
+    excluded_type = 0
+    excluded_disp = 0
+    excluded_price = 0
+    excluded_area = 0
+    excluded_floor = 0
+    excluded_panel = 0
+
+    for unit in unique_units:
+        # Only free units
+        is_free = unit.get("is_free", False)
+        is_sold = unit.get("is_sold", False)
+        if is_sold or not is_free:
+            excluded_sold += 1
+            continue
+
+        # Only apartments
+        category = str(unit.get("category", "")).lower()
+        if "byt" not in category and "ateliér" not in category:
+            excluded_type += 1
+            continue
+
+        # Disposition
+        disp = unit.get("disposition", "")
+        if disp not in WANTED_DISPOSITIONS:
+            excluded_disp += 1
+            continue
+
+        # Price
+        price = unit.get("price_czk") or unit.get("action_price_czk") or 0
+        if price <= 0 or price > MAX_PRICE:
+            excluded_price += 1
+            continue
+
+        # Area
+        area = unit.get("total_area") or unit.get("floor_area") or 0
+        if area < MIN_AREA:
+            excluded_area += 1
+            continue
+
+        # Floor
+        floor_str = str(unit.get("floor", ""))
+        floor = None
+        if floor_str:
+            try:
+                floor = int(floor_str)
+            except ValueError:
+                floor_match = re.search(r'(-?\d+)', floor_str)
+                if floor_match:
+                    floor = int(floor_match.group(1))
+
+        if floor is not None and floor < MIN_FLOOR:
+            excluded_floor += 1
+            continue
+
+        # Construction — check for panel
+        build_type = str(unit.get("build_type", "")).lower()
+        if "panel" in build_type:
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen: panel ({build_type})")
+            continue
+
+        # Build construction label
+        building_type = "neuvedeno"
+        if build_type and build_type != "nevybráno":
+            if "cihlo" in build_type or "cihla" in build_type:
+                building_type = "Cihlová"
+            elif "skelet" in build_type:
+                building_type = "Skeletová"
+            else:
+                building_type = build_type.capitalize()
+
+        lat = unit.get("latitude", 0)
+        lon = unit.get("longitude", 0)
+
+        slug = unit.get("slug", "")
+        project_slug = unit.get("_project_slug", "")
+        detail_url = f"{BASE_URL}/projekt/{project_slug}/{slug}" if slug else f"{BASE_URL}/projekt/{project_slug}"
+
+        result = {
+            "hash_id": unit.get("id", slug),
+            "name": f"Prodej bytu {disp} {area} m² — {unit.get('_project_name', '')}",
+            "price": int(price),
+            "price_formatted": format_price(int(price)),
+            "locality": f"{unit.get('street', unit.get('_project_name', ''))}, Praha",
+            "lat": lat,
+            "lon": lon,
+            "disposition": disp,
+            "floor": floor,
+            "area": area,
+            "building_type": building_type,
+            "ownership": unit.get("ownership", "neuvedeno") or "neuvedeno",
+            "url": detail_url,
+            "source": "psn",
+            "image": "",
+        }
+        results.append(result)
+
+    print(f"\n{'=' * 60}")
+    print(f"Výsledky PSN:")
+    print(f"  Celkem jednotek:       {len(unique_units)}")
+    print(f"  Vyloučeno (prodáno):   {excluded_sold}")
+    print(f"  Vyloučeno (typ):       {excluded_type}")
+    print(f"  Vyloučeno (dispozice): {excluded_disp}")
+    print(f"  Vyloučeno (cena):      {excluded_price}")
+    print(f"  Vyloučeno (plocha):    {excluded_area}")
+    print(f"  Vyloučeno (patro):     {excluded_floor}")
+    print(f"  Vyloučeno (panel):     {excluded_panel}")
+    print(f"  ✓ Vyhovující byty:    {len(results)}")
+    print(f"{'=' * 60}")
+
+    return results
+
+
+if __name__ == "__main__":
+    start = time.time()
+    estates = scrape()
+
+    if estates:
+        json_path = Path("byty_psn.json")
+        json_path.write_text(
+            json.dumps(estates, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        elapsed = time.time() - start
+        print(f"\n✓ Data uložena: {json_path.resolve()}")
+        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+    else:
+        print("\nŽádné byty z PSN neodpovídají kritériím :(")
--- a/scrape_realingo.py
+++ b/scrape_realingo.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""
+Realingo.cz scraper.
+Stáhne byty na prodej v Praze a vyfiltruje podle kritérií.
+Výstup: byty_realingo.json
+"""
+from __future__ import annotations
+
+import json
+import math
+import re
+import time
+import urllib.request
+from pathlib import Path
+
+# ── Konfigurace (sdílená se Sreality scraperem) ─────────────────────────────
+
+MAX_PRICE = 13_500_000
+MIN_AREA = 69
+MIN_FLOOR = 2
+PER_PAGE = 40  # Realingo vrací 40 na stránku
+
+# Kategorie které chceme (dispozice 3+kk a větší)
+WANTED_CATEGORIES = {
+    "FLAT3_KK", "FLAT31",   # 3+kk, 3+1
+    "FLAT4_KK", "FLAT41",   # 4+kk, 4+1
+    "FLAT5_KK", "FLAT51",   # 5+kk, 5+1
+    "FLAT6",                 # 6+
+    "OTHERS_FLAT",           # atypické — zkontrolujeme plochu
+}
+
+# Mapování category → label
+CATEGORY_LABELS = {
+    "FLAT1_KK": "1+kk", "FLAT11": "1+1",
+    "FLAT2_KK": "2+kk", "FLAT21": "2+1",
+    "FLAT3_KK": "3+kk", "FLAT31": "3+1",
+    "FLAT4_KK": "4+kk", "FLAT41": "4+1",
+    "FLAT5_KK": "5+kk", "FLAT51": "5+1",
+    "FLAT6": "6+",
+    "OTHERS_FLAT": "Atypický",
+}
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+    "Accept": "text/html,application/xhtml+xml",
+}
+
+BASE_URL = "https://www.realingo.cz"
+
+
+def fetch_listing_page(page: int = 1) -> tuple[list[dict], int]:
+    """Fetch a page of Prague listings. Returns (items, total_count)."""
+    if page == 1:
+        url = f"{BASE_URL}/prodej_byty/praha/"
+    else:
+        url = f"{BASE_URL}/prodej_byty/praha/{page}_strana/"
+
+    req = urllib.request.Request(url, headers=HEADERS)
+    resp = urllib.request.urlopen(req, timeout=30)
+    html = resp.read().decode("utf-8")
+
+    match = re.search(
+        r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
+        html, re.DOTALL
+    )
+    if not match:
+        return [], 0
+
+    data = json.loads(match.group(1))
+    offer_list = data["props"]["pageProps"]["store"]["offer"]["list"]
+    return offer_list["data"], offer_list["total"]
+
+
+def fetch_detail(listing_url: str) -> dict | None:
+    """Fetch detail page for a listing to get floor, building type, etc."""
+    try:
+        url = f"{BASE_URL}{listing_url}"
+        req = urllib.request.Request(url, headers=HEADERS)
+        resp = urllib.request.urlopen(req, timeout=30)
+        html = resp.read().decode("utf-8")
+
+        match = re.search(
+            r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
+            html, re.DOTALL
+        )
+        if not match:
+            return None
+
+        data = json.loads(match.group(1))
+        details = data["props"]["pageProps"]["store"]["offer"]["details"]
+        # Get first (only) detail entry
+        for detail_data in details.values():
+            return detail_data
+    except Exception as e:
+        print(f"    Warning: detail fetch failed for {listing_url}: {e}")
+    return None
+
+
+def format_price(price: int) -> str:
+    s = str(price)
+    parts = []
+    while s:
+        parts.append(s[-3:])
+        s = s[:-3]
+    return " ".join(reversed(parts)) + " Kč"
+
+
+def load_cache(json_path: str = "byty_realingo.json") -> dict[int, dict]:
+    """Load previously scraped data as cache keyed by hash_id."""
+    path = Path(json_path)
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return {e["hash_id"]: e for e in data if "hash_id" in e}
+    except (json.JSONDecodeError, KeyError):
+        return {}
+
+
+def scrape():
+    cache = load_cache()
+
+    print("=" * 60)
+    print("Stahuji inzeráty z Realingo.cz")
+    print(f"Cena: do {format_price(MAX_PRICE)}")
+    print(f"Min. plocha: {MIN_AREA} m²")
+    print(f"Patro: od {MIN_FLOOR}. NP")
+    print(f"Region: Praha")
+    if cache:
+        print(f"Cache: {len(cache)} bytů z minulého běhu")
+    print("=" * 60)
+
+    # Step 1: Fetch all listing pages
+    print("\nFáze 1: Stahování seznamu inzerátů...")
+    all_listings = []
+    page = 1
+    total = None
+
+    while True:
+        print(f"  Strana {page} ...")
+        items, total_count = fetch_listing_page(page)
+        if total is None:
+            total = total_count
+            total_pages = math.ceil(total / PER_PAGE)
+            print(f"  → Celkem {total} inzerátů, {total_pages} stran")
+
+        if not items:
+            break
+
+        all_listings.extend(items)
+        page += 1
+        if page > total_pages:
+            break
+        time.sleep(0.5)
+
+    print(f"\n  Staženo: {len(all_listings)} inzerátů")
+
+    # Step 2: Pre-filter by category, price, area from listing data
+    pre_filtered = []
+    excluded_category = 0
+    excluded_price = 0
+    excluded_area = 0
+    excluded_no_gps = 0
+
+    for item in all_listings:
+        cat = item.get("category", "")
+        if cat not in WANTED_CATEGORIES:
+            excluded_category += 1
+            continue
+
+        price = item.get("price", {}).get("total", 0) or 0
+        if price > MAX_PRICE or price == 0:
+            excluded_price += 1
+            continue
+
+        area = item.get("area", {}).get("main")
+        if area is not None and area < MIN_AREA:
+            excluded_area += 1
+            continue
+
+        loc = item.get("location", {})
+        if not loc.get("latitude") or not loc.get("longitude"):
+            excluded_no_gps += 1
+            continue
+
+        pre_filtered.append(item)
+
+    print(f"\nPo předfiltraci:")
+    print(f"  Vyloučeno (dispozice): {excluded_category}")
+    print(f"  Vyloučeno (cena):      {excluded_price}")
+    print(f"  Vyloučeno (plocha):    {excluded_area}")
+    print(f"  Vyloučeno (bez GPS):   {excluded_no_gps}")
+    print(f"  Zbývá:                 {len(pre_filtered)}")
+
+    # Step 3: Fetch details for remaining listings (floor, building type)
+    print(f"\nFáze 2: Stahování detailů ({len(pre_filtered)} bytů)...")
+    results = []
+    excluded_panel = 0
+    excluded_floor = 0
+    excluded_detail = 0
+    cache_hits = 0
+
+    for i, item in enumerate(pre_filtered):
+        # Check cache — if hash_id exists and price unchanged, reuse
+        item_id = int(item["id"])
+        item_price = item.get("price", {}).get("total", 0) or 0
+        cached = cache.get(item_id)
+        if cached and cached.get("price") == item_price:
+            cache_hits += 1
+            results.append(cached)
+            continue
+
+        time.sleep(0.3)
+        detail_data = fetch_detail(item["url"])
+
+        if not detail_data:
+            excluded_detail += 1
+            continue
+
+        detail = detail_data.get("offer", {}).get("detail", {})
+        if not detail and "detail" in detail_data:
+            detail = detail_data["detail"]
+
+        # Check building type — exclude panel
+        building_type = detail.get("buildingType", "")
+        if building_type == "PANEL":
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen #{item['id']}: panel")
+            continue
+
+        # Check building position — exclude sídliště
+        building_position = detail.get("buildingPosition", "")
+        if building_position and "ESTATE" in str(building_position).upper():
+            excluded_panel += 1
+            print(f"  ✗ Vyloučen #{item['id']}: sídliště")
+            continue
+
+        # Check floor
+        floor = detail.get("floor")
+        if floor is not None and floor < MIN_FLOOR:
+            excluded_floor += 1
+            continue
+
+        # Map building type
+        bt_map = {
+            "BRICK": "Cihlová",
+            "PANEL": "Panelová",
+            "WOOD": "Dřevostavba",
+            "STEEL": "Ocelová",
+            "MIXED": "Smíšená",
+            "MONTAGE": "Montovaná",
+        }
+        ownership_map = {
+            "PRIVATE": "Osobní",
+            "COOPERATIVE": "Družstevní",
+            "STATE": "Státní/obecní",
+        }
+
+        cat = item.get("category", "")
+        loc = item.get("location", {})
+
+        result = {
+            "hash_id": int(item["id"]),
+            "name": f"Prodej bytu {CATEGORY_LABELS.get(cat, '?')} {item.get('area', {}).get('main', '?')} m²",
+            "price": item.get("price", {}).get("total", 0),
+            "price_formatted": format_price(item.get("price", {}).get("total", 0)),
+            "locality": loc.get("address", "Praha"),
+            "lat": loc["latitude"],
+            "lon": loc["longitude"],
+            "disposition": CATEGORY_LABELS.get(cat, "?"),
+            "floor": floor,
+            "area": item.get("area", {}).get("main"),
+            "building_type": bt_map.get(building_type, building_type or "neuvedeno"),
+            "ownership": ownership_map.get(detail.get("ownership", ""), detail.get("ownership") or "neuvedeno"),
+            "url": f"{BASE_URL}{item['url']}",
+            "source": "realingo",
+            "image": "",
+        }
+        results.append(result)
+
+        if (i + 1) % 20 == 0:
+            print(f"  Zpracováno {i + 1}/{len(pre_filtered)} ...")
+
+    print(f"\n{'=' * 60}")
+    print(f"Výsledky Realingo:")
+    print(f"  Předfiltrováno:        {len(pre_filtered)}")
+    print(f"  Z cache (přeskočeno): {cache_hits}")
+    print(f"  Vyloučeno (panel/síd): {excluded_panel}")
+    print(f"  Vyloučeno (patro):     {excluded_floor}")
+    print(f"  Vyloučeno (bez detailu): {excluded_detail}")
+    print(f"  ✓ Vyhovující byty:    {len(results)}")
+    print(f"{'=' * 60}")
+
+    return results
+
+
+if __name__ == "__main__":
+    start = time.time()
+    estates = scrape()
+
+    if estates:
+        json_path = Path("byty_realingo.json")
+        json_path.write_text(
+            json.dumps(estates, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        elapsed = time.time() - start
+        print(f"\n✓ Data uložena: {json_path.resolve()}")
+        print(f"⏱  Celkový čas: {elapsed:.0f} s")
+    else:
+        print("\nŽádné byty z Realinga neodpovídají kritériím :(")