Add first_seen/last_updated timestamps to track property freshness

Each property record now carries two date fields:
- first_seen: date the listing first appeared (preserved across runs)
- last_updated: date of the most recent scrape that included it

All 6 scrapers (Sreality, Realingo, Bezrealitky, iDNES, PSN, CityHome)
set these fields during scraping. Cached results preserve first_seen and
refresh last_updated. PSN and CityHome gain a load_previous() helper to
track first_seen across runs (they lacked caching before).

The merge script keeps the earliest first_seen and latest last_updated
when deduplicating listings across sources.

The HTML map now shows dates in popups ("Přidáno: DD.MM.YYYY"), displays
a green "NOVÉ" badge on newly discovered listings, and adds a "Přidáno"
dropdown filter (24h / 3 days / 7 days / 14 days) for spotting new ones.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jan Novak
2026-02-15 21:03:08 +01:00
parent c6089f0da9
commit 0b95c847c4
9 changed files with 1604 additions and 11509 deletions

View File

@@ -272,9 +272,13 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
# Check cache — if hash_id exists and price unchanged, reuse
cached = cache.get(hash_id)
today = datetime.now().strftime("%Y-%m-%d")
if cached and cached.get("price") == estate.get("price", 0):
cache_hits += 1
logger.debug(f"Cache hit for hash_id={hash_id}")
cached["last_updated"] = today
if "first_seen" not in cached:
cached["first_seen"] = today
results.append(cached)
continue
@@ -332,6 +336,11 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
disp_cb = estate.get("_disposition_cb") or estate.get("seo", {}).get("category_sub_cb")
seo = estate.get("seo", {})
# Preserve first_seen from cache if this is a price-changed re-fetch
first_seen = today
if cached and "first_seen" in cached:
first_seen = cached["first_seen"]
result = {
"hash_id": hash_id,
"name": estate.get("name", ""),
@@ -347,6 +356,8 @@ def scrape(max_pages: int | None = None, max_properties: int | None = None):
"ownership": ownership,
"url": sreality_url(hash_id, seo),
"image": (estate.get("_links", {}).get("images", [{}])[0].get("href", "") if estate.get("_links", {}).get("images") else ""),
"first_seen": first_seen,
"last_updated": today,
}
results.append(result)
details_fetched += 1
@@ -384,6 +395,12 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
"6+": "#607D8B", # grey-blue
}
def fmt_date(d):
"""Format ISO date (YYYY-MM-DD) to Czech format (DD.MM.YYYY)."""
if d and len(d) == 10:
return f"{d[8:10]}.{d[5:7]}.{d[:4]}"
return ""
markers_js = ""
for e in estates:
color = color_map.get(e["disposition"], "#999999")
@@ -404,17 +421,42 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
source_color = source_colors.get(source, "#999")
hash_id = e.get("hash_id", "")
first_seen = e.get("first_seen", "")
last_updated = e.get("last_updated", "")
first_seen_fmt = fmt_date(first_seen)
last_updated_fmt = fmt_date(last_updated)
# "NOVÉ" badge if first_seen equals latest scrape date
new_badge = ""
if first_seen and first_seen == last_updated:
new_badge = (
'<span style="margin-left:6px;font-size:10px;background:#4CAF50;color:white;'
'padding:1px 5px;border-radius:3px;font-weight:bold;">NOVÉ</span>'
)
# Date info line
date_line = ""
if first_seen_fmt:
date_line = (
f'<div style="margin-top:4px;font-size:11px;color:#888;">'
f'Přidáno: {first_seen_fmt}'
)
if last_updated_fmt and last_updated != first_seen:
date_line += f' · Aktualizace: {last_updated_fmt}'
date_line += '</div>'
popup = (
f'<div style="min-width:280px;font-family:system-ui,sans-serif;" data-hashid="{hash_id}">'
f'<b style="font-size:14px;">{format_price(e["price"])}</b>'
f'<span style="margin-left:8px;font-size:11px;background:{source_color};color:white;'
f'padding:1px 6px;border-radius:3px;">{source_label}</span><br>'
f'padding:1px 6px;border-radius:3px;">{source_label}</span>{new_badge}<br>'
f'<span style="color:#666;">{e["disposition"]} | {area_text} | {floor_text}</span>'
f'{floor_note}<br><br>'
f'<b>{e["locality"]}</b><br>'
f'Stavba: {building_text}<br>'
f'Vlastnictví: {ownership_text}<br><br>'
f'Vlastnictví: {ownership_text}'
f'{date_line}<br>'
f'<a href="{e["url"]}" target="_blank" '
f'style="color:{source_color};text-decoration:none;font-weight:bold;">'
f'→ Otevřít na {source_label}</a>'
@@ -440,7 +482,7 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
is_fav = source in ("psn", "cityhome")
marker_fn = "addHeartMarker" if is_fav else "addMarker"
markers_js += (
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}');\n"
f" {marker_fn}({e['lat']}, {e['lon']}, '{color}', '{popup}', '{hash_id}', '{first_seen}');\n"
)
# Build legend
@@ -552,6 +594,17 @@ def generate_map(estates: list[dict], output_path: str = "mapa_bytu.html"):
</select>
</label>
</div>
<div style="margin-top:6px;">
<label>Přidáno:
<select id="first-seen-filter" onchange="applyFilters()">
<option value="all">Vše</option>
<option value="1">Posledních 24h</option>
<option value="3">Poslední 3 dny</option>
<option value="7">Poslední týden</option>
<option value="14">Posledních 14 dní</option>
</select>
</label>
</div>
</div>
<div class="filter-section">
<div id="rating-counts" style="margin-bottom:6px;font-size:12px;color:#666;">
@@ -583,7 +636,7 @@ L.tileLayer('https://{{s}}.basemaps.cartocdn.com/light_only_labels/{{z}}/{{x}}/{
var allMarkers = [];
function addMarker(lat, lon, color, popup, hashId) {{
function addMarker(lat, lon, color, popup, hashId, firstSeen) {{
var marker = L.circleMarker([lat, lon], {{
radius: 8,
fillColor: color,
@@ -592,7 +645,7 @@ function addMarker(lat, lon, color, popup, hashId) {{
opacity: 1,
fillOpacity: 0.85,
}}).bindPopup(popup);
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId }};
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, firstSeen: firstSeen }};
allMarkers.push(marker);
marker.addTo(map);
}}
@@ -612,11 +665,11 @@ function heartIcon(color) {{
}});
}}
function addHeartMarker(lat, lon, color, popup, hashId) {{
function addHeartMarker(lat, lon, color, popup, hashId, firstSeen) {{
var marker = L.marker([lat, lon], {{
icon: heartIcon(color),
}}).bindPopup(popup);
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true }};
marker._data = {{ lat: lat, lon: lon, color: color, hashId: hashId, isHeart: true, firstSeen: firstSeen }};
allMarkers.push(marker);
marker.addTo(map);
}}
@@ -807,13 +860,25 @@ map.on('popupopen', function(e) {{
}});
// ── Filters ────────────────────────────────────────────────────
function daysAgoDate(days) {{
var d = new Date();
d.setDate(d.getDate() - days);
return d.toISOString().slice(0, 10);
}}
function applyFilters() {{
var minFloor = parseInt(document.getElementById('min-floor').value);
var maxPrice = parseInt(document.getElementById('max-price').value);
var hideRejected = document.getElementById('hide-rejected').checked;
var firstSeenVal = document.getElementById('first-seen-filter').value;
var ratings = loadRatings();
var visible = 0;
var minFirstSeen = '';
if (firstSeenVal !== 'all') {{
minFirstSeen = daysAgoDate(parseInt(firstSeenVal));
}}
allMarkers.forEach(function(m) {{
var popup = m.getPopup().getContent();
var floorMatch = popup.match(/(\\d+)\\. NP/);
@@ -826,6 +891,11 @@ function applyFilters() {{
if (floor !== null && floor < minFloor) show = false;
if (price > maxPrice) show = false;
// Date filter
if (minFirstSeen && m._data.firstSeen) {{
if (m._data.firstSeen < minFirstSeen) show = false;
}}
var r = ratings[m._data.hashId];
if (hideRejected && r && r.status === 'reject') show = false;