Add --data-dir CLI argument to replace symlink-based data persistence
All checks were successful
Build and Push / build (push) Successful in 7s

The Docker entrypoint previously created symlinks from /app/ to /app/data/
so that scripts writing relative paths would persist to the mounted volume.
This caused symlink loops in production when stale symlinks leaked into the
host data directory.

Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now
accept a --data-dir argument (default: ".") that controls where data files
are read from and written to. The entrypoint and crontab pass
--data-dir /app/data, eliminating the need for symlinks entirely.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jan Novak
2026-02-15 22:56:41 +01:00
parent a1212c6312
commit a09876d749
12 changed files with 88 additions and 48 deletions

View File

@@ -122,8 +122,8 @@ def load_previous(json_path: str = "byty_psn.json") -> dict[str, str]:
return {}
def scrape(max_pages: int | None = None, max_properties: int | None = None):
previous_first_seen = load_previous()
def scrape(max_pages: int | None = None, max_properties: int | None = None, data_dir: str = "."):
previous_first_seen = load_previous(str(Path(data_dir) / "byty_psn.json"))
logger.info("=" * 60)
logger.info("Stahuji inzeráty z PSN.cz")
logger.info(f"Cena: do {format_price(MAX_PRICE)}")
@@ -346,6 +346,8 @@ if __name__ == "__main__":
help="Maximum number of properties to include in results")
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Logging level (default: INFO)")
parser.add_argument("--data-dir", type=str, default=".",
help="Directory for reading/writing data files (default: current dir)")
args = parser.parse_args()
# Configure logging
@@ -355,11 +357,12 @@ if __name__ == "__main__":
handlers=[logging.StreamHandler()]
)
data_dir = Path(args.data_dir)
start = time.time()
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties, data_dir=args.data_dir)
if estates:
json_path = Path("byty_psn.json")
json_path = data_dir / "byty_psn.json"
json_path.write_text(
json.dumps(estates, ensure_ascii=False, indent=2),
encoding="utf-8",