Add --data-dir CLI argument to replace symlink-based data persistence
All checks were successful
Build and Push / build (push) Successful in 7s

The Docker entrypoint previously created symlinks from /app/ to /app/data/
so that scripts writing relative paths would persist to the mounted volume.
This caused symlink loops in production when stale symlinks leaked into the
host data directory.

Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now
accept a --data-dir argument (default: ".") that controls where data files
are read from and written to. The entrypoint and crontab pass
--data-dir /app/data, eliminating the need for symlinks entirely.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jan Novak
2026-02-15 22:56:41 +01:00
parent a1212c6312
commit a09876d749
12 changed files with 88 additions and 48 deletions

View File

@@ -7,6 +7,7 @@ PSN a CityHome mají při deduplikaci prioritu (načtou se první).
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
@@ -40,7 +41,7 @@ def dedup_key(estate: dict) -> str:
return f"{street}_{price}_{area}"
def main():
def main(data_dir: str = "."):
# Definice zdrojů — PSN a CityHome jako první (mají prioritu při deduplikaci)
sources = [
("PSN", "byty_psn.json"),
@@ -51,10 +52,11 @@ def main():
("iDNES", "byty_idnes.json"),
]
data_path = Path(data_dir)
all_estates = []
for label, filename in sources:
path = Path(filename)
path = data_path / filename
if path.exists():
data = json.loads(path.read_text(encoding="utf-8"))
# Ensure source is set (Sreality legacy)
@@ -111,7 +113,7 @@ def main():
print(f" {src}: {count}")
# Save merged data
merged_path = Path("byty_merged.json")
merged_path = data_path / "byty_merged.json"
merged_path.write_text(
json.dumps(deduplicated, ensure_ascii=False, indent=2),
encoding="utf-8",
@@ -119,8 +121,12 @@ def main():
print(f"\n✓ Sloučená data: {merged_path.resolve()}")
# Generate map
generate_map(deduplicated)
generate_map(deduplicated, output_path=str(data_path / "mapa_bytu.html"))
if __name__ == "__main__":
main()
parser = argparse.ArgumentParser(description="Merge scraped data and generate map")
parser.add_argument("--data-dir", type=str, default=".",
help="Directory for reading/writing data files (default: current dir)")
args = parser.parse_args()
main(data_dir=args.data_dir)