Add --data-dir CLI argument to replace symlink-based data persistence
All checks were successful
Build and Push / build (push) Successful in 7s
All checks were successful
Build and Push / build (push) Successful in 7s
The Docker entrypoint previously created symlinks from /app/ to /app/data/ so that scripts writing relative paths would persist to the mounted volume. This caused symlink loops in production when stale symlinks leaked into the host data directory. Instead, all scrapers, merge_and_map.py, regen_map.py, and run_all.sh now accept a --data-dir argument (default: ".") that controls where data files are read from and written to. The entrypoint and crontab pass --data-dir /app/data, eliminating the need for symlinks entirely. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -135,8 +135,8 @@ def load_cache(json_path: str = "byty_realingo.json") -> dict[int, dict]:
|
||||
return {}
|
||||
|
||||
|
||||
def scrape(max_pages: int | None = None, max_properties: int | None = None):
|
||||
cache = load_cache()
|
||||
def scrape(max_pages: int | None = None, max_properties: int | None = None, data_dir: str = "."):
|
||||
cache = load_cache(str(Path(data_dir) / "byty_realingo.json"))
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("Stahuji inzeráty z Realingo.cz")
|
||||
@@ -354,6 +354,8 @@ if __name__ == "__main__":
|
||||
help="Maximum number of properties to fetch details for")
|
||||
parser.add_argument("--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||
help="Logging level (default: INFO)")
|
||||
parser.add_argument("--data-dir", type=str, default=".",
|
||||
help="Directory for reading/writing data files (default: current dir)")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure logging
|
||||
@@ -363,11 +365,12 @@ if __name__ == "__main__":
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
|
||||
data_dir = Path(args.data_dir)
|
||||
start = time.time()
|
||||
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties)
|
||||
estates = scrape(max_pages=args.max_pages, max_properties=args.max_properties, data_dir=args.data_dir)
|
||||
|
||||
if estates:
|
||||
json_path = Path("byty_realingo.json")
|
||||
json_path = data_dir / "byty_realingo.json"
|
||||
json_path.write_text(
|
||||
json.dumps(estates, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
|
||||
Reference in New Issue
Block a user