Add --data-dir CLI argument to replace symlink-based data persistence
All checks were successful
Build and Push / build (push) Successful in 7s
All checks were successful
Build and Push / build (push) Successful in 7s
The Docker entrypoint previously created symlinks from /app/ to /app/data/ so that scripts writing relative paths would persist to the mounted volume. This caused symlink loops in production when stale symlinks leaked into the host data directory. Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now accept a --data-dir argument (default: ".") that controls where data files are read from and written to. The entrypoint and crontab pass --data-dir /app/data, eliminating the need for symlinks entirely. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ PSN a CityHome mají při deduplikaci prioritu (načtou se první).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -40,7 +41,7 @@ def dedup_key(estate: dict) -> str:
|
||||
return f"{street}_{price}_{area}"
|
||||
|
||||
|
||||
def main():
|
||||
def main(data_dir: str = "."):
|
||||
# Definice zdrojů — PSN a CityHome jako první (mají prioritu při deduplikaci)
|
||||
sources = [
|
||||
("PSN", "byty_psn.json"),
|
||||
@@ -51,10 +52,11 @@ def main():
|
||||
("iDNES", "byty_idnes.json"),
|
||||
]
|
||||
|
||||
data_path = Path(data_dir)
|
||||
all_estates = []
|
||||
|
||||
for label, filename in sources:
|
||||
path = Path(filename)
|
||||
path = data_path / filename
|
||||
if path.exists():
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
# Ensure source is set (Sreality legacy)
|
||||
@@ -111,7 +113,7 @@ def main():
|
||||
print(f" {src}: {count}")
|
||||
|
||||
# Save merged data
|
||||
merged_path = Path("byty_merged.json")
|
||||
merged_path = data_path / "byty_merged.json"
|
||||
merged_path.write_text(
|
||||
json.dumps(deduplicated, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
@@ -119,8 +121,12 @@ def main():
|
||||
print(f"\n✓ Sloučená data: {merged_path.resolve()}")
|
||||
|
||||
# Generate map
|
||||
generate_map(deduplicated)
|
||||
generate_map(deduplicated, output_path=str(data_path / "mapa_bytu.html"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
parser = argparse.ArgumentParser(description="Merge scraped data and generate map")
|
||||
parser.add_argument("--data-dir", type=str, default=".",
|
||||
help="Directory for reading/writing data files (default: current dir)")
|
||||
args = parser.parse_args()
|
||||
main(data_dir=args.data_dir)
|
||||
|
||||
Reference in New Issue
Block a user