Skip to content

Commit

Permalink
Merge pull request #60 from SainsburyWellcomeCentre/fetch-data-from-gin
Browse files Browse the repository at this point in the history
Host datasets on an external data repository
  • Loading branch information
rodrigcd authored Jul 6, 2023
2 parents 66069af + 2d8b762 commit 1e4a58d
Show file tree
Hide file tree
Showing 9 changed files with 212 additions and 86 deletions.
177 changes: 118 additions & 59 deletions examples/experimental_examples/experimental_data_examples.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion neuralplayground/arenas/hafting_2008.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
use_behavioral_data: bool
If True, then uses the animal trajectories recorded in Hafting 2008
data_path: str
if None, load the data of corresponding experiment available in the package,
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index of corresponding experiment class
Expand Down
4 changes: 2 additions & 2 deletions neuralplayground/arenas/wernle_2018.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(
use_behavioral_data: bool
If True, then uses the animal trajectories recorded in Wernle 2018
data_path: str
if None, load the data of corresponding experiment available in the package,
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index of corresponding experiment class
Expand Down Expand Up @@ -162,7 +162,7 @@ def __init__(
use_behavioral_data: bool
If True, then uses the animal trajectories recorded in Wernle 2018
data_path: str
if None, load the data of corresponding experiment available in the package,
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index of corresponding experiment class
Expand Down
57 changes: 57 additions & 0 deletions neuralplayground/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Module for fetching and loading datasets.
This module provides functions for fetching and loading data used in tests,
examples, and tutorials. The data are stored in a remote repository on GIN
and are downloaded to the user's local machine the first time they are used.
"""

from pathlib import Path

import pooch

# URL to GIN data repository where the experimental data are hosted
DATA_URL = "https://gin.g-node.org/SainsburyWellcomeCentre/NeuralPlayground/raw/master"

# Data to be downloaded and cached in ~/.NeuralPlayground/data
LOCAL_DATA_DIR = Path("~", ".NeuralPlayground", "data").expanduser()
LOCAL_DATA_DIR.mkdir(parents=True, exist_ok=True)

# A pooch data registry object
# Datasets are in the "data" subfolder as zip files - format: {dataset_name}.zip
DATASET_REGISTRY = pooch.create(
path=LOCAL_DATA_DIR,
base_url=f"{DATA_URL}/data/",
registry={
"hafting_2008.zip": "18934257966c8017e0d86909576468fc7fef5cf5388042b89ffa0833aeb12f04", # noqa: E501
"sargolini_2006.zip": "ca5011e32bb510491e81d2e1d74c45b4ffd1e5c3c5f326237fadd9b2a8867bc3", # noqa: E501
"wernle_2018.zip": "eed1ee8fda8f0ea12e39323db9fecc3e8bb61d3e18aac7dd88ec32d402e5982e", # noqa: E501
},
)

dataset_names = [n.split(".")[0] for n in DATASET_REGISTRY.registry.keys()]


def fetch_data_path(
dataset_name: str,
progressbar: bool = True,
):
"""Download and cache a dataset from the GIN repository.
Parameters
----------
dataset_name : str
The name of one the available datasets, e.g. "hafting_2008".
progressbar : bool
If True, show a progress bar while downloading the data.
Defaults to True.
Returns
-------
str
Path to the downloaded dataset
"""
if dataset_name not in dataset_names:
raise ValueError(f"Dataset {dataset_name} not found. Available datasets: {dataset_names}")
DATASET_REGISTRY.fetch(f"{dataset_name}.zip", processor=pooch.Unzip(extract_dir=LOCAL_DATA_DIR), progressbar=progressbar)
data_path = LOCAL_DATA_DIR / dataset_name
return data_path.as_posix() + "/"
19 changes: 11 additions & 8 deletions neuralplayground/experiments/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Experiment

* [1. Introduction](#1-Introduction)
* [2. Experiment Implemented](#2-Experiment-Implemented)
* [3. How to Contribute](#3-How-to-Contribute)
- [Experiment](#experiment)
- [1. Introduction](#1-introduction)
- [2. Experiment-Implemented](#2-experiment-implemented)
- [3. How-to-Contribute](#3-how-to-contribute)

## 1. Introduction

Expand All @@ -29,19 +30,21 @@ One of our goals is to expand this list to add more experiments that are relevan

## 3. How-to-Contribute

1. Create a directory where to download and store the data with name author_data.
1. The experimental data are hosted on a [separate data repository on GIN](https://gin.g-node.org/SainsburyWellcomeCentre/NeuralPlayground). GIN offers an interface almost identical to GitHub. To contribute a new dataset, you need to fork the repository and open a pull request, just like on GitHub. Place yout data in a folder named as "author_date", zip the folder, and place "author_date.zip" under the "data" directory of the Forked repository, for example, "data/smith_2023.zip". If you encounter any problems with this procedure, do not hesitate to contact us.

2. Create a class to read/filter with name author_date_data the data inheriting from the [Experiment class](https://github.com/ClementineDomine/NeuralPlayground/blob/main/neuralplayground/experiments/experiment_core.py),
2. Next, you need to update the `DATASET_REGISTRY` in the [`datasets.py` module](../datasets.py). You will need to add the dataset name (e.f. "smith_2023") and the corresponding sha256 hash of the zip file. You can compute the hash using the following command in the terminal: `sha256sum author_date.zip`. The hash is the first string in the output.

3. Create a class to read/filter with name author_date_data the data inheriting from the [Experiment class](https://github.com/ClementineDomine/NeuralPlayground/blob/main/neuralplayground/experiments/experiment_core.py),
which is just an abstract class to share same parent. For a 2D environment, the new data class could inherit from the
base [Hafting2008Data](https://github.com/ClementineDomine/NeuralPlayground/blob/main/neuralplayground/experiments/hafting_2008_data.py)
directly (as [Sargolini2006Data](https://github.com/ClementineDomine/NeuralPlayground/blob/main/neuralplayground/experiments/sargolini_2006_data.py) does),
which has implemented some basic functions providing that they share a similar data structure.

3. Create or add to [Examples](https://github.com/ClementineDomine/NeuralPlayground/tree/main/examples/experimental_examples/) jupyter notebook for the new experiment.
1. Create or add to [Examples](https://github.com/ClementineDomine/NeuralPlayground/tree/main/examples/experimental_examples/) jupyter notebook for the new experiment.

4. Add unit tests in the [test module](https://github.com/ClementineDomine/NeuralPlayground/tree/main/neuralplayground/tests)
2. Add unit tests in the [test module](https://github.com/ClementineDomine/NeuralPlayground/tree/main/neuralplayground/tests)

5. Cite the data appropriately. Your contribution will be automatically considered by our bot once the pull request has been accepted to the main branch.
3. Cite the data appropriately. Your contribution will be automatically considered by our bot once the pull request has been accepted to the main branch.


All contributions should be submitted through a pull request that we will later access.
Expand Down
10 changes: 6 additions & 4 deletions neuralplayground/experiments/hafting_2008_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import scipy.io as sio
from IPython.display import display

import neuralplayground
from neuralplayground.datasets import fetch_data_path
from neuralplayground.utils import clean_data, get_2D_ratemap

from .experiment_core import Experiment
Expand All @@ -34,7 +34,8 @@ def __init__(
Parameters
----------
data_path: str
if None, load the data sample in the package, else load data from given path
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index
experiment_name: str
Expand Down Expand Up @@ -65,9 +66,10 @@ def set_animal_data(self, recording_index: int = 0, tolerance: float = 1e-10):
self.head_direction = head_direction

def _find_data_path(self, data_path: str):
"""Set self.data_path to the data directory within the package"""
"""Fetch data from NeuralPlayground data repository
if no data path is supplied by the user"""
if data_path is None:
self.data_path = os.path.join(neuralplayground.__path__[0], "experiments/hafting_2008/")
self.data_path = fetch_data_path("hafting_2008")
else:
self.data_path = data_path

Expand Down
19 changes: 10 additions & 9 deletions neuralplayground/experiments/sargolini_2006_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import scipy.io as sio

import neuralplayground
from neuralplayground.datasets import fetch_data_path
from neuralplayground.experiments import Experiment, Hafting2008Data
from neuralplayground.utils import clean_data

Expand All @@ -28,12 +29,13 @@ def __init__(
experiment_name: str
string to identify object in case of multiple instances
data_path: str
if None, load the data sample in the package, else load data from given path
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
"""
self.experiment_name = experiment_name
if data_path is None:
# Set data_path to the data directory within the package
self.data_path = os.path.join(neuralplayground.__path__[0], "experiments/sargolini_2006")
self.data_path = fetch_data_path("sargolini_2006")
else:
self.data_path = data_path
# Sort the data in data_path
Expand Down Expand Up @@ -105,7 +107,8 @@ def __init__(
Parameters
----------
data_path: str
if None, load the data sample in the package, else load data from given path
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index
experiment_name: str
Expand All @@ -121,14 +124,12 @@ def __init__(
)

def _find_data_path(self, data_path: str):
"""Set self.data_path to the data directory within the package"""
"""Fetch data from NeuralPlayground data repository
if no data path is supplied by the user"""
if data_path is None:
self.data_path = os.path.join(
neuralplayground.__path__[0],
"experiments/sargolini_2006/raw_data_sample/",
)
self.data_path = fetch_data_path("sargolini_2006") + "raw_data_sample/"
else:
self.data_path = data_path
self.data_path = data_path + "raw_data_sample/"

def _load_data(self):
"""Parse data according to specific data format
Expand Down
9 changes: 6 additions & 3 deletions neuralplayground/experiments/wernle_2018_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import scipy.io as sio

import neuralplayground
from neuralplayground.datasets import fetch_data_path
from neuralplayground.experiments.hafting_2008_data import Hafting2008Data
from neuralplayground.utils import get_2D_ratemap

Expand All @@ -30,7 +31,8 @@ def __init__(
Parameters
----------
data_path: str
if None, load the data sample in the package, else load data from given path
if None, fetch the data from the NeuralPlayground data repository,
else load data from given path
recording_index: int
if None, load data from default recording index
experiment_name: str
Expand All @@ -46,9 +48,10 @@ def __init__(
)

def _find_data_path(self, data_path: str):
"""Set self.data_path to the data directory within the package"""
"""Fetch data from NeuralPlayground data repository
if no data path is supplied by the user"""
if data_path is None:
self.data_path = os.path.join(neuralplayground.__path__[0], "experiments/wernle_2018/")
self.data_path = fetch_data_path("wernle_2018")
else:
self.data_path = data_path

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies = [
"deepdiff",
"opencv-python",
"gymnasium",
"pooch",
]

[project.urls]
Expand Down

0 comments on commit 1e4a58d

Please sign in to comment.