From 42a7eb6172f1df06571960173c841c2bebec6260 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:34:05 +0000 Subject: [PATCH] Fix pooch retrieval of file registry (#260) * Fix pooch retrieval of file registry We need to specify a filename in `pooch.retrieve` for the file to be correctly overwritten everytime. If filename is None, it is set as + . So if the URL stays the same, the file won't be updated, even if the contents are changed Signed-off-by: sfmig <33267254+sfmig@users.noreply.github.com> * Fix precommits and add wheel as dependency to check-manifest (py3.12) * Force file registry to download every time --------- Signed-off-by: sfmig <33267254+sfmig@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- tests/fixtures/integration.py | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ed19f3ed..157827fc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,7 +50,7 @@ repos: hooks: - id: check-manifest args: [--no-build-isolation] - additional_dependencies: [setuptools-scm] + additional_dependencies: [setuptools-scm, wheel] # - repo: https://github.com/codespell-project/codespell # # Configuration for codespell is in pyproject.toml # rev: v2.3.0 diff --git a/tests/fixtures/integration.py b/tests/fixtures/integration.py index 393928c3..98439557 100644 --- a/tests/fixtures/integration.py +++ b/tests/fixtures/integration.py @@ -21,18 +21,28 @@ def pooch_registry() -> dict: URL and hash of the GIN repository with the test data """ + # Cache the test data in the user's home directory + test_data_dir = Path.home() / ".crabs-exploration-test-data" + + # Remove the file registry if it exists + # otherwise the registry is not downloaded everytime + file_registry_path = test_data_dir / "files-registry.txt" + if file_registry_path.is_file(): + Path(file_registry_path).unlink() + # Initialise pooch registry registry = pooch.create( - Path.home() / ".crabs-exploration-test-data", + test_data_dir, base_url=f"{GIN_TEST_DATA_REPO}/raw/master/test_data", ) # Download only the registry file from GIN - # if known_hash = None, the file is always downloaded. + # (this file should always be downloaded fresh from GIN) file_registry = pooch.retrieve( url=f"{GIN_TEST_DATA_REPO}/raw/master/files-registry.txt", known_hash=None, - path=Path.home() / ".crabs-exploration-test-data", + fname=file_registry_path.name, + path=file_registry_path.parent, ) # Load registry file onto pooch registry