Add status dashboard, server, scraper stats, and DATA_DIR support
All checks were successful
Build and Push / build (push) Successful in 7s
All checks were successful
Build and Push / build (push) Successful in 7s
Key changes:
- Replace ratings_server.py + status.html with a unified server.py that
serves the map, scraper status dashboard, and ratings API in one process
- Add scraper_stats.py utility: each scraper writes per-run stats (fetched,
accepted, excluded, duration) to stats_<source>.json for the status page
- generate_status.py: respect DATA_DIR env var so status.json lands in the
configured data directory instead of always the project root
- run_all.sh: replace the {"status":"running"} overwrite of status.json with
a dedicated scraper_running.json lock file; trap on EXIT ensures cleanup
even on kill/error, preventing the previous run's results from being wiped
- server.py: detect running state via scraper_running.json existence instead
of status["status"] field, eliminating the dual-use race condition
- Makefile: add serve (local dev), debug (Docker debug container) targets;
add SERVER_PORT variable
- build/Dockerfile + entrypoint.sh: switch to server.py, set DATA_DIR,
adjust volume mounts
- .gitignore: add *.json and *.log to keep runtime data files out of VCS
- mapa_bytu.html: price-per-m² colouring, status link, UX tweaks
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,9 @@ import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlencode
|
||||
from scraper_stats import write_stats
|
||||
|
||||
STATS_FILE = "stats_psn.json"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -67,6 +70,8 @@ def format_price(price: int) -> str:
|
||||
|
||||
|
||||
def scrape(max_properties: int | None = None):
|
||||
_run_start = time.time()
|
||||
_run_ts = datetime.now().isoformat(timespec="seconds")
|
||||
logger.info("=" * 60)
|
||||
logger.info("Stahuji inzeráty z PSN.cz")
|
||||
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
|
||||
@@ -93,6 +98,15 @@ def scrape(max_properties: int | None = None):
|
||||
data = fetch_json(url)
|
||||
except Exception as e:
|
||||
logger.error(f"Chyba při stahování: {e}", exc_info=True)
|
||||
write_stats(STATS_FILE, {
|
||||
"source": "PSN",
|
||||
"timestamp": _run_ts,
|
||||
"duration_sec": round(time.time() - _run_start, 1),
|
||||
"success": False,
|
||||
"accepted": 0,
|
||||
"fetched": 0,
|
||||
"error": str(e),
|
||||
})
|
||||
return []
|
||||
|
||||
all_units = data.get("units", {}).get("data", [])
|
||||
@@ -241,6 +255,15 @@ def scrape(max_properties: int | None = None):
|
||||
logger.info(f" ✓ Vyhovující byty: {len(results)}")
|
||||
logger.info(f"{'=' * 60}")
|
||||
|
||||
write_stats(STATS_FILE, {
|
||||
"source": "PSN",
|
||||
"timestamp": _run_ts,
|
||||
"duration_sec": round(time.time() - _run_start, 1),
|
||||
"success": True,
|
||||
"accepted": len(results),
|
||||
"fetched": len(all_units),
|
||||
"excluded": excluded,
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
@@ -259,8 +282,22 @@ if __name__ == "__main__":
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
|
||||
_run_ts = datetime.now().isoformat(timespec="seconds")
|
||||
start = time.time()
|
||||
estates = scrape(max_properties=args.max_properties)
|
||||
try:
|
||||
estates = scrape(max_properties=args.max_properties)
|
||||
except Exception as e:
|
||||
logger.error(f"Scraper failed: {e}", exc_info=True)
|
||||
write_stats(STATS_FILE, {
|
||||
"source": "PSN",
|
||||
"timestamp": _run_ts,
|
||||
"duration_sec": round(time.time() - start, 1),
|
||||
"success": False,
|
||||
"accepted": 0,
|
||||
"fetched": 0,
|
||||
"error": str(e),
|
||||
})
|
||||
raise
|
||||
|
||||
if estates:
|
||||
json_path = Path("byty_psn.json")
|
||||
|
||||
Reference in New Issue
Block a user