- Rewrite PSN scraper to use /api/units-list endpoint (single API call, no HTML parsing) - Fix CityHome scraper: GPS from multiple URL patterns, address from table cells, no 404 retries - Color map markers by price/m² instead of disposition (blue→green→orange→red scale) - Add persistent rating system (favorite/reject) with Flask ratings server and localStorage fallback - Rejected markers show original color at reduced opacity with 🚫 SVG overlay - Favorite markers shown as ⭐ star icons with gold pulse animation - Add "new today" marker logic (scraped_at == today) with larger pulsing green outline - Add filter panel with floor, price, hide-rejected controls and ☰/✕ toggle buttons - Add generate_status.py for scraper run statistics and status.html dashboard - Add scraped_at field to all scrapers for freshness tracking - Update run_all.sh with log capture and status generation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
118 lines
4.1 KiB
Bash
Executable File
118 lines
4.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ============================================================
|
|
# Spustí všechny scrapery, sloučí data a otevře mapu.
|
|
# Použití: ./run_all.sh
|
|
# Nebo s limity: ./run_all.sh --max-pages 1 --max-properties 10
|
|
# Nebo s logováním: ./run_all.sh --log-level DEBUG
|
|
# ============================================================
|
|
set -euo pipefail
|
|
cd "$(dirname "$0")"
|
|
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
BOLD='\033[1m'
|
|
NC='\033[0m'
|
|
|
|
TOTAL=6
|
|
CURRENT=0
|
|
FAILED=0
|
|
START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
|
|
START_EPOCH=$(date +%s)
|
|
LOG_FILE="$(pwd)/scrape_run.log"
|
|
|
|
# Mark status as running
|
|
echo '{"status":"running"}' > status.json
|
|
|
|
show_help() {
|
|
echo "Usage: ./run_all.sh [OPTIONS]"
|
|
echo ""
|
|
echo "Spustí všechny scrapery, sloučí data a otevře mapu."
|
|
echo ""
|
|
echo "Options:"
|
|
echo " --max-pages N Maximální počet stránek ke stažení z každého zdroje"
|
|
echo " --max-properties N Maximální počet nemovitostí ke stažení z každého zdroje"
|
|
echo " --log-level LEVEL Úroveň logování (DEBUG, INFO, WARNING, ERROR)"
|
|
echo " -h, --help Zobrazí tuto nápovědu"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " ./run_all.sh # plný běh"
|
|
echo " ./run_all.sh --max-pages 1 --max-properties 10 # rychlý test"
|
|
echo " ./run_all.sh --log-level DEBUG # s debug logováním"
|
|
}
|
|
|
|
# Parse arguments
|
|
SCRAPER_ARGS=""
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-h|--help)
|
|
show_help
|
|
exit 0
|
|
;;
|
|
--max-pages|--max-properties|--log-level)
|
|
SCRAPER_ARGS="$SCRAPER_ARGS $1 $2"
|
|
shift 2
|
|
;;
|
|
*)
|
|
echo "Unknown argument: $1"
|
|
echo ""
|
|
show_help
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
step() {
|
|
CURRENT=$((CURRENT + 1))
|
|
echo ""
|
|
echo -e "${BOLD}[$CURRENT/$TOTAL] $1${NC}"
|
|
echo "------------------------------------------------------------"
|
|
}
|
|
|
|
# ── Scrapery (paralelně kde to jde) ─────────────────────────
|
|
# Tee all output to log file for status generation
|
|
exec > >(tee -a "$LOG_FILE") 2>&1
|
|
|
|
step "Sreality"
|
|
python3 scrape_and_map.py $SCRAPER_ARGS || { echo -e "${RED}✗ Sreality selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
step "Realingo"
|
|
python3 scrape_realingo.py $SCRAPER_ARGS || { echo -e "${RED}✗ Realingo selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
step "Bezrealitky"
|
|
python3 scrape_bezrealitky.py $SCRAPER_ARGS || { echo -e "${RED}✗ Bezrealitky selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
step "iDNES Reality"
|
|
python3 scrape_idnes.py $SCRAPER_ARGS || { echo -e "${RED}✗ iDNES selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
step "PSN + CityHome"
|
|
python3 scrape_psn.py $SCRAPER_ARGS &
|
|
PID_PSN=$!
|
|
python3 scrape_cityhome.py $SCRAPER_ARGS &
|
|
PID_CH=$!
|
|
wait $PID_PSN || { echo -e "${RED}✗ PSN selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
wait $PID_CH || { echo -e "${RED}✗ CityHome selhalo${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
# ── Sloučení + mapa ──────────────────────────────────────────
|
|
|
|
step "Sloučení dat a generování mapy"
|
|
python3 merge_and_map.py || { echo -e "${RED}✗ Merge selhal${NC}"; FAILED=$((FAILED + 1)); }
|
|
|
|
# ── Otevření mapy ────────────────────────────────────────────
|
|
|
|
# ── Generování statusu ─────────────────────────────────────
|
|
|
|
END_EPOCH=$(date +%s)
|
|
DURATION=$((END_EPOCH - START_EPOCH))
|
|
python3 generate_status.py "$START_TIME" "$DURATION" "$LOG_FILE"
|
|
|
|
echo ""
|
|
echo "============================================================"
|
|
if [ $FAILED -eq 0 ]; then
|
|
echo -e "${GREEN}${BOLD}Hotovo! Všech 6 zdrojů úspěšně staženo.${NC}"
|
|
else
|
|
echo -e "${RED}${BOLD}Hotovo s $FAILED chybami.${NC}"
|
|
fi
|
|
echo "============================================================"
|
|
|
|
command -v open &>/dev/null && open mapa_bytu.html || true
|