#!/usr/bin/env python3 """ General-purpose HTTP server for maru-hleda-byt. Serves static files from DATA_DIR and additionally handles: GET /scrapers-status → SSR scraper status page GET /api/ratings → ratings.json contents POST /api/ratings → save entire ratings object GET /api/ratings/export → same as GET, with download header GET /api/status → status.json contents (JSON) GET /api/status/history → scraper_history.json contents (JSON) """ from __future__ import annotations import functools import json import logging import os import sys from datetime import datetime from http.server import HTTPServer, SimpleHTTPRequestHandler from pathlib import Path PORT = int(os.environ.get("SERVER_PORT", 8080)) DATA_DIR = Path(os.environ.get("DATA_DIR", ".")) RATINGS_FILE = DATA_DIR / "ratings.json" _LOG_LEVEL = getattr(logging, os.environ.get("LOG_LEVEL", "INFO").upper(), logging.INFO) logging.basicConfig( level=_LOG_LEVEL, format="%(asctime)s [server] %(levelname)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", ) log = logging.getLogger(__name__) # ── Helpers ────────────────────────────────────────────────────────────────── COLORS = { "sreality": "#1976D2", "realingo": "#7B1FA2", "bezrealitky": "#E65100", "idnes": "#C62828", "psn": "#2E7D32", "cityhome": "#00838F", } MONTHS_CZ = [ "ledna", "února", "března", "dubna", "května", "června", "července", "srpna", "září", "října", "listopadu", "prosince", ] def _load_json(path: Path, default=None): """Read and parse JSON file; return default on missing or parse error.""" log.debug("_load_json: %s", path.resolve()) try: if path.exists(): return json.loads(path.read_text(encoding="utf-8")) except Exception as e: log.warning("Failed to load %s: %s", path, e) return default def _fmt_date(iso_str: str) -> str: """Format ISO timestamp as Czech date string.""" try: d = datetime.fromisoformat(iso_str) return f"{d.day}. {MONTHS_CZ[d.month - 1]} {d.year}, {d.hour:02d}:{d.minute:02d}" except Exception: return iso_str def load_ratings() -> dict: return _load_json(RATINGS_FILE, default={}) def save_ratings(data: dict) -> None: RATINGS_FILE.write_text( json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8", ) # ── SSR status page ────────────────────────────────────────────────────────── _CSS = """\ * { margin: 0; padding: 0; box-sizing: border-box; } body { font-family: system-ui, -apple-system, sans-serif; background: #f5f5f5; color: #333; padding: 24px; max-width: 640px; margin: 0 auto; } h1 { font-size: 22px; margin-bottom: 4px; } .subtitle { color: #888; font-size: 13px; margin-bottom: 24px; } .card { background: white; border-radius: 12px; padding: 20px; box-shadow: 0 1px 4px rgba(0,0,0,0.08); margin-bottom: 16px; } .card h2 { font-size: 15px; margin-bottom: 12px; color: #555; } .timestamp { font-size: 28px; font-weight: 700; color: #1976D2; } .timestamp-sub { font-size: 13px; color: #999; margin-top: 2px; } .summary-row { display: flex; justify-content: space-between; align-items: center; padding: 10px 0; border-bottom: 1px solid #f0f0f0; } .summary-row:last-child { border-bottom: none; } .summary-label { font-size: 13px; color: #666; } .summary-value { font-size: 18px; font-weight: 700; } .badge { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600; color: white; } .badge-ok { background: #4CAF50; } .badge-err { background: #F44336; } .badge-skip { background: #FF9800; } .bar-row { display: flex; align-items: center; gap: 8px; margin: 4px 0; } .bar-track { flex: 1; height: 20px; background: #f0f0f0; border-radius: 4px; overflow: hidden; } .bar-fill { height: 100%; border-radius: 4px; } .bar-count { font-size: 12px; width: 36px; font-variant-numeric: tabular-nums; } .loader-wrap { display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 60px 0; } .spinner { width: 40px; height: 40px; border: 4px solid #e0e0e0; border-top-color: #1976D2; border-radius: 50%; animation: spin 0.8s linear infinite; } @keyframes spin { to { transform: rotate(360deg); } } .loader-text { margin-top: 16px; color: #999; font-size: 14px; } .link-row { text-align: center; margin-top: 8px; } .link-row a { color: #1976D2; text-decoration: none; font-size: 14px; } .history-table { width: 100%; border-collapse: collapse; font-size: 12px; } .history-table th { text-align: left; font-weight: 600; color: #999; font-size: 11px; padding: 4px 6px 8px 6px; border-bottom: 2px solid #f0f0f0; } .history-table td { padding: 7px 6px; border-bottom: 1px solid #f5f5f5; vertical-align: middle; } .history-table tr:last-child td { border-bottom: none; } .history-table tr.latest td { background: #f8fbff; font-weight: 600; } .src-nums { display: flex; gap: 4px; flex-wrap: wrap; } .src-chip { display: inline-block; padding: 1px 5px; border-radius: 3px; font-size: 10px; color: white; font-variant-numeric: tabular-nums; } .clickable-row { cursor: pointer; } .clickable-row:hover td { background: #f0f7ff !important; } /* Modal */ #md-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.45); display: flex; align-items: flex-start; justify-content: center; z-index: 1000; padding: 40px 16px; overflow-y: auto; } #md-box { background: white; border-radius: 12px; padding: 24px; width: 100%; max-width: 620px; position: relative; box-shadow: 0 8px 32px rgba(0,0,0,0.24); margin: auto; } #md-close { position: absolute; top: 10px; right: 14px; background: none; border: none; font-size: 26px; cursor: pointer; color: #aaa; line-height: 1; } #md-close:hover { color: #333; } #md-box h3 { font-size: 15px; margin-bottom: 14px; padding-right: 24px; } .md-summary { display: flex; gap: 20px; flex-wrap: wrap; font-size: 13px; margin-bottom: 16px; color: #555; } .md-summary b { color: #333; } .detail-table { width: 100%; border-collapse: collapse; font-size: 12px; } .detail-table th { text-align: left; color: #999; font-size: 11px; font-weight: 600; padding: 4px 8px 6px 0; border-bottom: 2px solid #f0f0f0; white-space: nowrap; } .detail-table td { padding: 6px 8px 6px 0; border-bottom: 1px solid #f5f5f5; vertical-align: top; } .detail-table tr:last-child td { border-bottom: none; } """ _SOURCE_ORDER = ["Sreality", "Realingo", "Bezrealitky", "iDNES", "PSN", "CityHome"] _SOURCE_ABBR = ["Sre", "Rea", "Bez", "iDN", "PSN", "CH"] def _sources_html(sources: list) -> str: if not sources: return "" max_count = max((s.get("accepted", 0) for s in sources), default=1) or 1 parts = ['

Zdroje

'] for s in sources: name = s.get("name", "?") accepted = s.get("accepted", 0) error = s.get("error") exc = s.get("excluded", {}) excluded_total = sum(exc.values()) if isinstance(exc, dict) else s.get("excluded_total", 0) color = COLORS.get(name.lower(), "#999") pct = round(accepted / max_count * 100) if max_count else 0 if error: badge = 'chyba' elif accepted == 0: badge = '0' else: badge = 'OK' parts.append( f'
' f'
' f'{name} {badge}' f'{excluded_total} vyloučených' f'
' f'
' f'
' f'{accepted}' f'
' ) parts.append("
") return "".join(parts) def _history_html(history: list) -> str: if not history: return "" rows = list(reversed(history)) parts = [ '
' '

Historie běhů – klikni pro detaily

', '', '', '', ] for i, entry in enumerate(rows): row_class = ' class="latest clickable-row"' if i == 0 else ' class="clickable-row"' src_map = {s["name"]: s for s in entry.get("sources", []) if "name" in s} chips = "".join( f'' f'{abbr} {src_map[name].get("accepted", 0) if name in src_map else "-"}' for name, abbr in zip(_SOURCE_ORDER, _SOURCE_ABBR) ) ok_badge = ( 'chyba' if entry.get("success") is False else 'OK' ) dur = f'{entry["duration_sec"]}s' if entry.get("duration_sec") is not None else "-" parts.append( f'' f'' f'' f'' f'' f'' f'' ) parts.append("
DatumTrváníPřijato / DedupZdrojeOK
{_fmt_date(entry.get("timestamp", ""))}{dur}{entry.get("total_accepted", "-")} / {entry.get("deduplicated", "-")}
{chips}
{ok_badge}
") return "".join(parts) def _modal_script(rows_json: str) -> str: """Return the modal overlay HTML + JS for the history detail popup.""" return ( '\n' '' ) def _render_status_html(status: dict | None, history: list, is_running: bool = False) -> str: """Generate the complete HTML page for /scrapers-status.""" head_open = ( '\n\n\n' '\n' '\n' f'Scraper status\n\n' ) page_header = '

Scraper status

\n
maru-hleda-byt
\n' footer = '' if status is None: return ( head_open + '\n\n' + page_header + '

Status není k dispozici.

\n' + footer + '\n\n' ) if is_running: return ( head_open + '\n' + '\n\n' + page_header + '
' + '
Scraper právě běží…
\n' + footer + '\n\n' ) # ── Done state ──────────────────────────────────────────────────────────── ts = status.get("timestamp", "") duration = status.get("duration_sec") total_accepted = status.get("total_accepted", 0) deduplicated = status.get("deduplicated") ts_card = ( '

Poslední scrape

' f'
{_fmt_date(ts)}
' + (f'
Trvání: {round(duration)}s
' if duration is not None else "") + '
' ) sum_card = ( '

Souhrn

' f'
Vyhovujících bytů' f'{total_accepted}
' + ( f'
Po deduplikaci (v mapě)' f'{deduplicated}
' if deduplicated is not None else "" ) + '
' ) rows_for_js = list(reversed(history)) body = ( page_header + ts_card + "\n" + sum_card + "\n" + _sources_html(status.get("sources", [])) + "\n" + _history_html(history) + "\n" + footer ) modal = _modal_script(json.dumps(rows_for_js, ensure_ascii=False)) return head_open + '\n\n' + body + '\n' + modal + '\n\n' # ── HTTP handler ────────────────────────────────────────────────────────────── class Handler(SimpleHTTPRequestHandler): def log_message(self, format, *args): pass # suppress default access log; use our own where needed def _send_json(self, status: int, body, extra_headers=None): payload = json.dumps(body, ensure_ascii=False).encode("utf-8") self.send_response(status) self.send_header("Content-Type", "application/json; charset=utf-8") self.send_header("Content-Length", str(len(payload))) self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") self.send_header("Access-Control-Allow-Headers", "Content-Type") if extra_headers: for k, v in extra_headers.items(): self.send_header(k, v) self.end_headers() self.wfile.write(payload) def do_OPTIONS(self): self.send_response(204) self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") self.send_header("Access-Control-Allow-Headers", "Content-Type") self.end_headers() def do_GET(self): if self.path.startswith("/api/"): self._handle_api_get() elif self.path.rstrip("/") == "/scrapers-status": self._serve_status_page() else: log.debug("GET %s → static file: %s", self.path, self.translate_path(self.path)) super().do_GET() def _handle_api_get(self): if self.path in ("/api/ratings", "/api/ratings/export"): ratings = load_ratings() extra = None if self.path == "/api/ratings/export": extra = {"Content-Disposition": 'attachment; filename="ratings.json"'} log.info("GET %s → %d ratings", self.path, len(ratings)) self._send_json(200, ratings, extra) elif self.path == "/api/status": data = _load_json(DATA_DIR / "status.json") if data is None: self._send_json(404, {"error": "status not available"}) return log.info("GET /api/status → ok") self._send_json(200, data) elif self.path == "/api/status/history": data = _load_json(DATA_DIR / "scraper_history.json", default=[]) if not isinstance(data, list): data = [] log.info("GET /api/status/history → %d entries", len(data)) self._send_json(200, data) else: self._send_json(404, {"error": "not found"}) def _serve_status_page(self): status = _load_json(DATA_DIR / "status.json") history = _load_json(DATA_DIR / "scraper_history.json", default=[]) if not isinstance(history, list): history = [] is_running = (DATA_DIR / "scraper_running.json").exists() html = _render_status_html(status, history, is_running) payload = html.encode("utf-8") self.send_response(200) self.send_header("Content-Type", "text/html; charset=utf-8") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) def do_POST(self): if self.path == "/api/ratings": length = int(self.headers.get("Content-Length", 0)) if length == 0: self._send_json(400, {"error": "empty body"}) return try: raw = self.rfile.read(length) data = json.loads(raw.decode("utf-8")) except Exception as e: log.warning("Bad request body: %s", e) self._send_json(400, {"error": "invalid JSON"}) return if not isinstance(data, dict): self._send_json(400, {"error": "expected JSON object"}) return save_ratings(data) log.info("POST /api/ratings → saved %d ratings", len(data)) self._send_json(200, {"ok": True, "count": len(data)}) else: self._send_json(404, {"error": "not found"}) if __name__ == "__main__": log.info("Server starting on port %d, data dir: %s", PORT, DATA_DIR) handler = functools.partial(Handler, directory=str(DATA_DIR)) server = HTTPServer(("0.0.0.0", PORT), handler) try: server.serve_forever() except KeyboardInterrupt: log.info("Stopped.") sys.exit(0)