Add validation mode, structured logging, and CLI args to all scrapers
- Replace print() with Python logging module across all 6 scrapers for configurable log levels (DEBUG/INFO/WARNING/ERROR) - Add --max-pages, --max-properties, and --log-level CLI arguments to each scraper via argparse for limiting scrape scope - Add validation Make targets (validation, validation-local, validation-local-debug) for quick test runs with limited data - Update run_all.sh to parse and forward CLI args to all scrapers - Update mapa_bytu.html with latest scrape results Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
52
run_all.sh
52
run_all.sh
@@ -2,6 +2,8 @@
|
||||
# ============================================================
|
||||
# Spustí všechny scrapery, sloučí data a otevře mapu.
|
||||
# Použití: ./run_all.sh
|
||||
# Nebo s limity: ./run_all.sh --max-pages 1 --max-properties 10
|
||||
# Nebo s logováním: ./run_all.sh --log-level DEBUG
|
||||
# ============================================================
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")"
|
||||
@@ -15,6 +17,44 @@ TOTAL=6
|
||||
CURRENT=0
|
||||
FAILED=0
|
||||
|
||||
show_help() {
|
||||
echo "Usage: ./run_all.sh [OPTIONS]"
|
||||
echo ""
|
||||
echo "Spustí všechny scrapery, sloučí data a otevře mapu."
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --max-pages N Maximální počet stránek ke stažení z každého zdroje"
|
||||
echo " --max-properties N Maximální počet nemovitostí ke stažení z každého zdroje"
|
||||
echo " --log-level LEVEL Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
|
||||
echo " -h, --help Zobrazí tuto nápovědu"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " ./run_all.sh # plný běh"
|
||||
echo " ./run_all.sh --max-pages 1 --max-properties 10 # rychlý test"
|
||||
echo " ./run_all.sh --log-level DEBUG # s debug logováním"
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
SCRAPER_ARGS=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--max-pages|--max-properties|--log-level)
|
||||
SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1"
|
||||
echo ""
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
step() {
|
||||
CURRENT=$((CURRENT + 1))
|
||||
echo ""
|
||||
@@ -25,21 +65,21 @@ step() {
|
||||
# ── Scrapery (paralelně kde to jde) ─────────────────────────
|
||||
|
||||
step "Sreality"
|
||||
python3 scrape_and_map.py || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
|
||||
step "Realingo"
|
||||
python3 scrape_realingo.py || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
|
||||
step "Bezrealitky"
|
||||
python3 scrape_bezrealitky.py || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
python3 scrape_bezrealitky.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
|
||||
step "iDNES Reality"
|
||||
python3 scrape_idnes.py || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
python3 scrape_idnes.py $SCRAPER_ARGS || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
|
||||
step "PSN + CityHome"
|
||||
python3 scrape_psn.py &
|
||||
python3 scrape_psn.py $SCRAPER_ARGS &
|
||||
PID_PSN=$!
|
||||
python3 scrape_cityhome.py &
|
||||
python3 scrape_cityhome.py $SCRAPER_ARGS &
|
||||
PID_CH=$!
|
||||
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
||||
|
||||
Reference in New Issue
Block a user