Add --data-dir CLI argument to replace symlink-based data persistence
All checks were successful
Build and Push / build (push) Successful in 7s

The Docker entrypoint previously created symlinks from /app/ to /app/data/
so that scripts writing relative paths would persist to the mounted volume.
This caused symlink loops in production when stale symlinks leaked into the
host data directory.

Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now
accept a --data-dir argument (default: ".") that controls where data files
are read from and written to. The entrypoint and crontab pass
--data-dir /app/data, eliminating the need for symlinks entirely.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jan Novak
2026-02-15 22:56:41 +01:00
parent a1212c6312
commit a09876d749
12 changed files with 88 additions and 48 deletions

View File

@@ -170,8 +170,8 @@ def load_cache(json_path: str = "byty_bezrealitky.json") -> dict[int, dict]:
return {}
def scrape(max_pages: int | None = None, max_properties: int | None = None):
cache = load_cache()
def scrape(max_pages: int | None = None, max_properties: int | None = None, data_dir: str = "."):
cache = load_cache(str(Path(data_dir) / "byty_bezrealitky.json"))
logger.info("=" * 60)
logger.info("Stahuji inzeráty z Bezrealitky.cz")
@@ -395,6 +395,8 @@ if __name__ == "__main__":
help="Maximum number of properties to fetch details for")
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Logging level (default: INFO)")
parser.add_argument("--data-dir", type=str, default=".",
help="Directory for reading/writing data files (default: current dir)")
args = parser.parse_args()
# Configure logging
@@ -404,11 +406,12 @@ if __name__ == "__main__":
handlers=[logging.StreamHandler()]
)
data_dir = Path(args.data_dir)
start = time.time()
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties, data_dir=args.data_dir)
if estates:
json_path = Path("byty_bezrealitky.json")
json_path = data_dir / "byty_bezrealitky.json"
json_path.write_text(
json.dumps(estates, ensure_ascii=False, indent=2),
encoding="utf-8",