Add status dashboard, server, scraper stats, and DATA_DIR support
All checks were successful
Build and Push / build (push) Successful in 7s
All checks were successful
Build and Push / build (push) Successful in 7s
Key changes:
- Replace ratings_server.py + status.html with a unified server.py that
serves the map, scraper status dashboard, and ratings API in one process
- Add scraper_stats.py utility: each scraper writes per-run stats (fetched,
accepted, excluded, duration) to stats_<source>.json for the status page
- generate_status.py: respect DATA_DIR env var so status.json lands in the
configured data directory instead of always the project root
- run_all.sh: replace the {"status":"running"} overwrite of status.json with
a dedicated scraper_running.json lock file; trap on EXIT ensures cleanup
even on kill/error, preventing the previous run's results from being wiped
- server.py: detect running state via scraper_running.json existence instead
of status["status"] field, eliminating the dual-use race condition
- Makefile: add serve (local dev), debug (Docker debug container) targets;
add SERVER_PORT variable
- build/Dockerfile + entrypoint.sh: switch to server.py, set DATA_DIR,
adjust volume mounts
- .gitignore: add *.json and *.log to keep runtime data files out of VCS
- mapa_bytu.html: price-per-m² colouring, status link, UX tweaks
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
15
run_all.sh
15
run_all.sh
@@ -20,8 +20,10 @@ START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
|
||||
START_EPOCH=$(date +%s)
|
||||
LOG_FILE="$(pwd)/scrape_run.log"
|
||||
|
||||
# Mark status as running
|
||||
echo '{"status":"running"}' > status.json
|
||||
# Mark scraper as running; cleaned up on exit (even on error/kill)
|
||||
LOCK_FILE="${DATA_DIR:-.}/scraper_running.json"
|
||||
echo '{"running":true,"started_at":"'"$START_TIME"'"}' > "$LOCK_FILE"
|
||||
trap 'rm -f "$LOCK_FILE"' EXIT
|
||||
|
||||
show_help() {
|
||||
echo "Usage: ./run_all.sh [OPTIONS]"
|
||||
@@ -32,16 +34,19 @@ show_help() {
|
||||
echo " --max-pages N Maximální počet stránek ke stažení z každého zdroje"
|
||||
echo " --max-properties N Maximální počet nemovitostí ke stažení z každého zdroje"
|
||||
echo " --log-level LEVEL Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
|
||||
echo " --keep N Počet běhů v historii (výchozí: 5, 0=neomezeno)"
|
||||
echo " -h, --help Zobrazí tuto nápovědu"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " ./run_all.sh # plný běh"
|
||||
echo " ./run_all.sh --max-pages 1 --max-properties 10 # rychlý test"
|
||||
echo " ./run_all.sh --log-level DEBUG # s debug logováním"
|
||||
echo " ./run_all.sh --keep 10 # uchovej 10 běhů v historii"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
SCRAPER_ARGS=""
|
||||
KEEP_ARG=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
@@ -52,6 +57,10 @@ while [[ $# -gt 0 ]]; do
|
||||
SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
|
||||
shift 2
|
||||
;;
|
||||
--keep)
|
||||
KEEP_ARG="--keep $2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1"
|
||||
echo ""
|
||||
@@ -103,7 +112,7 @@ python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((F
|
||||
|
||||
END_EPOCH=$(date +%s)
|
||||
DURATION=$((END_EPOCH - START_EPOCH))
|
||||
python3 generate_status.py "$START_TIME" "$DURATION" "$LOG_FILE"
|
||||
python3 generate_status.py --start-time "$START_TIME" --duration "$DURATION" $KEEP_ARG
|
||||
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
|
||||
Reference in New Issue
Block a user