Files
maru-hleda-byt/run_all.sh
Jan Novak a09876d749
All checks were successful
Build and Push / build (push) Successful in 7s
Add --data-dir CLI argument to replace symlink-based data persistence
The Docker entrypoint previously created symlinks from /app/ to /app/data/
so that scripts writing relative paths would persist to the mounted volume.
This caused symlink loops in production when stale symlinks leaked into the
host data directory.

Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now
accept a --data-dir argument (default: ".") that controls where data files
are read from and written to. The entrypoint and crontab pass
--data-dir /app/data, eliminating the need for symlinks entirely.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 22:56:41 +01:00

114 lines
3.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# ============================================================
# Spustí všechny scrapery, sloučí data a otevře mapu.
# Použití: ./run_all.sh
# Nebo s limity: ./run_all.sh --max-pages 1 --max-properties 10
# Nebo s logováním: ./run_all.sh --log-level DEBUG
# S vlastním adresářem: ./run_all.sh --data-dir /app/data
# ============================================================
set -euo pipefail
cd "$(dirname "$0")"
GREEN='\033[0;32m'
RED='\033[0;31m'
BOLD='\033[1m'
NC='\033[0m'
TOTAL=6
CURRENT=0
FAILED=0
show_help() {
echo "Usage: ./run_all.sh [OPTIONS]"
echo ""
echo "Spustí všechny scrapery, sloučí data a otevře mapu."
echo ""
echo "Options:"
echo " --max-pages N Maximální počet stránek ke stažení z každého zdroje"
echo " --max-properties N Maximální počet nemovitostí ke stažení z každého zdroje"
echo " --log-level LEVEL Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
echo " --data-dir DIR Adresář pro čtení/zápis datových souborů (default: .)"
echo " -h, --help Zobrazí tuto nápovědu"
echo ""
echo "Examples:"
echo " ./run_all.sh # plný běh"
echo " ./run_all.sh --max-pages 1 --max-properties 10 # rychlý test"
echo " ./run_all.sh --log-level DEBUG # s debug logováním"
echo " ./run_all.sh --data-dir /app/data # Docker produkce"
}
# Parse arguments
SCRAPER_ARGS=""
DATA_DIR="."
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
exit 0
;;
--max-pages|--max-properties|--log-level)
SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
shift 2
;;
--data-dir)
DATA_DIR="$2"
shift 2
;;
*)
echo "Unknown argument: $1"
echo ""
show_help
exit 1
;;
esac
done
SCRAPER_ARGS="$SCRAPER_ARGS --data-dir $DATA_DIR"
step() {
CURRENT=$((CURRENT + 1))
echo ""
echo -e "${BOLD}[$CURRENT/$TOTAL] $1${NC}"
echo "------------------------------------------------------------"
}
# ── Scrapery (paralelně kde to jde) ─────────────────────────
step "Sreality"
python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
step "Realingo"
python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
step "Bezrealitky"
python3 scrape_bezrealitky.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
step "iDNES Reality"
python3 scrape_idnes.py $SCRAPER_ARGS || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
step "PSN + CityHome"
python3 scrape_psn.py $SCRAPER_ARGS &
PID_PSN=$!
python3 scrape_cityhome.py $SCRAPER_ARGS &
PID_CH=$!
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
# ── Sloučení + mapa ──────────────────────────────────────────
step "Sloučení dat a generování mapy"
python3 merge_and_map.py --data-dir "$DATA_DIR" || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((FAILED + 1)); }
# ── Otevření mapy ────────────────────────────────────────────
echo ""
echo "============================================================"
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}${BOLD}Hotovo! Všech 6 zdrojů úspěšně staženo.${NC}"
else
echo -e "${RED}${BOLD}Hotovo s $FAILED chybami.${NC}"
fi
echo "============================================================"
command -v open &>/dev/null && open "$DATA_DIR/mapa_bytu.html" || true