Add validation mode, structured logging, and CLI args to all scrapers
- Replace print() with Python logging module across all 6 scrapers for configurable log levels (DEBUG/INFO/WARNING/ERROR) - Add --max-pages, --max-properties, and --log-level CLI arguments to each scraper via argparse for limiting scrape scope - Add validation Make targets (validation, validation-local, validation-local-debug) for quick test runs with limited data - Update run_all.sh to parse and forward CLI args to all scrapers - Update mapa_bytu.html with latest scrape results Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
55
Makefile
55
Makefile
@@ -1,20 +1,26 @@
|
||||
IMAGE_NAME := maru-hleda-byt
|
||||
CONTAINER_NAME := maru-hleda-byt
|
||||
VOLUME_NAME := maru-hleda-byt-data
|
||||
PORT := 8080
|
||||
IMAGE_NAME := maru-hleda-byt
|
||||
CONTAINER_NAME := maru-hleda-byt
|
||||
VOLUME_NAME := maru-hleda-byt-data
|
||||
VALIDATION_CONTAINER := maru-hleda-byt-validation
|
||||
VALIDATION_VOLUME := maru-hleda-byt-validation-data
|
||||
PORT := 8080
|
||||
|
||||
.PHONY: build run stop logs scrape restart clean help
|
||||
.PHONY: build run stop logs scrape restart clean help validation validation-local validation-stop validation-local-debug
|
||||
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " build - Build the Docker image"
|
||||
@echo " run - Build and run the Docker container in the background"
|
||||
@echo " stop - Stop and remove the running container"
|
||||
@echo " logs - Show live container logs"
|
||||
@echo " scrape - Run the scraping script inside the container"
|
||||
@echo " restart - Restart the container (stop and run again)"
|
||||
@echo " clean - Stop container and remove the Docker image"
|
||||
@echo " help - Show this help message"
|
||||
@echo " build - Build the Docker image"
|
||||
@echo " run - Build and run the Docker container in the background"
|
||||
@echo " stop - Stop and remove the running container"
|
||||
@echo " logs - Show live container logs"
|
||||
@echo " scrape - Run the scraping script inside the container"
|
||||
@echo " validation - Run scraping with limits (1 page, 10 properties) in Docker container"
|
||||
@echo " validation-stop - Stop the validation Docker container"
|
||||
@echo " validation-local - Run scraping with limits (1 page, 10 properties) locally with Python"
|
||||
@echo " validation-local-debug - Run validation locally with DEBUG logging"
|
||||
@echo " restart - Restart the container (stop and run again)"
|
||||
@echo " clean - Stop container and remove the Docker image"
|
||||
@echo " help - Show this help message"
|
||||
|
||||
build:
|
||||
docker build -f build/Dockerfile -t $(IMAGE_NAME) .
|
||||
@@ -36,6 +42,29 @@ logs:
|
||||
scrape:
|
||||
docker exec $(CONTAINER_NAME) bash /app/run_all.sh
|
||||
|
||||
validation: build
|
||||
@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||
@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||
docker run -d --name $(VALIDATION_CONTAINER) \
|
||||
-p 8081:8080 \
|
||||
-v $(VALIDATION_VOLUME):/app/data \
|
||||
--restart unless-stopped \
|
||||
$(IMAGE_NAME)
|
||||
@sleep 2
|
||||
docker exec $(VALIDATION_CONTAINER) bash /app/run_all.sh --max-pages 1 --max-properties 10
|
||||
@echo "Validation map will be at http://localhost:8081/mapa_bytu.html"
|
||||
|
||||
validation-stop:
|
||||
@docker stop $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||
@docker rm $(VALIDATION_CONTAINER) 2>/dev/null || true
|
||||
@echo "Validation container stopped and removed"
|
||||
|
||||
validation-local:
|
||||
./run_all.sh --max-pages 1 --max-properties 10
|
||||
|
||||
validation-local-debug:
|
||||
./run_all.sh --max-pages 1 --max-properties 10 --log-level DEBUG
|
||||
|
||||
restart: stop run
|
||||
|
||||
clean: stop
|
||||
|
||||
Reference in New Issue
Block a user