Merge branch 'release/2.16'

int-brain-lab · Sep 27, 2022 · 9c91903 · 9c91903
2 parents 203ccff + 236ae29
commit 9c91903
Show file tree

Hide file tree

Showing 8 changed files with 115 additions and 32 deletions.
diff --git a/brainbox/task/passive.py b/brainbox/task/passive.py
@@ -202,12 +202,14 @@ def get_stim_aligned_activity(stim_events, spike_times, spike_depths, z_score_fl
 
         # Get rid of any nan values
         stim_times = stim_times[~np.isnan(stim_times)]
-        stim_intervals = np.c_[stim_times - pre_stim, stim_times + post_stim]
-        base_intervals = np.c_[stim_times - base_stim, stim_times - pre_stim]
-        out_intervals = stim_intervals[:, 1] > times[-1]
+        stim_intervals = stim_times - pre_stim
+        base_intervals = stim_times - base_stim
+        out_intervals = stim_intervals > times[-1]
 
         idx_stim = np.searchsorted(times, stim_intervals, side='right')[np.invert(out_intervals)]
         idx_base = np.searchsorted(times, base_intervals, side='right')[np.invert(out_intervals)]
+        idx_stim = np.c_[idx_stim, idx_stim + n_bins]
+        idx_base = np.c_[idx_base, idx_base + n_bins_base]
 
         stim_trials = np.zeros((depths.shape[0], n_bins, idx_stim.shape[0]))
         noise_trials = np.zeros((depths.shape[0], n_bins_base, idx_stim.shape[0]))

diff --git a/ibllib/__init__.py b/ibllib/__init__.py
@@ -1,5 +1,5 @@
 """Library implementing the International Brain Laboratory data pipeline."""
-__version__ = "2.15.3"
+__version__ = "2.16.0"
 import warnings
 
 from iblutil.util import get_logger

diff --git a/ibllib/atlas/flatmaps.py b/ibllib/atlas/flatmaps.py
@@ -3,7 +3,6 @@
 """
 from functools import lru_cache
 
-import pandas as pd
 import numpy as np
 from scipy.interpolate import interp1d
 import matplotlib.pyplot as plt
@@ -152,6 +151,7 @@ def plot_swanson(acronyms=None, values=None, ax=None, hemisphere=None, br=None,
     """
     mapping = 'Swanson'
     br = BrainRegions() if br is None else br
+    br.compute_hierarchy()
     s2a = swanson()
     # both hemishpere
     if hemisphere == 'both':
@@ -169,25 +169,7 @@ def plot_swanson(acronyms=None, values=None, ax=None, hemisphere=None, br=None,
         im = br.rgba[regions]
         iswan = None
     else:
-        user_aids = br.parse_acronyms_argument(acronyms)
-        # if the user provided inputs are higher level than swanson propagate down
-        swaids = br.id[np.unique(s2a)]
-        maids = np.setdiff1d(user_aids, swaids)  # those are the indices not in Swanson
-        for i, maid in enumerate(maids):
-            if maid <= 1:
-                continue
-            childs_in_sw = np.intersect1d(br.descendants(maid)['id'][1:], swaids)
-            if childs_in_sw.size > 0:
-                user_aids = np.r_[user_aids, childs_in_sw]
-                values = np.r_[values, values[i] + childs_in_sw * 0]
-        # the user may have input non-unique regions
-        df = pd.DataFrame(dict(aid=user_aids, value=values)).groupby('aid').mean()
-        aids, vals = (df.index.values, df['value'].values)
-        # apply mapping and perform another round of aggregation
-        _, _, ibr = np.intersect1d(aids, br.id, return_indices=True)
-        ibr = br.mappings['Swanson-lr'][ibr]
-        df = pd.DataFrame(dict(ibr=ibr, value=vals)).groupby('ibr').mean()
-        ibr, vals = (df.index.values, df['value'].values)
+        ibr, vals = br.propagate_down(acronyms, values)
         # we now have the mapped regions and aggregated values, map values onto swanson map
         iswan, iv = ismember(s2a, ibr)
         im = np.zeros_like(s2a, dtype=np.float32)
@@ -211,8 +193,9 @@ def plot_swanson(acronyms=None, values=None, ax=None, hemisphere=None, br=None,
 
     # provides the mean to see the region on axis
     def format_coord(x, y):
-        acronym = br.acronym[s2a[int(y), int(x)]]
-        return f'x={x:1.4f}, y={x:1.4f}, {acronym}'
+        ind = s2a[int(y), int(x)]
+        ancestors = br.ancestors(br.id[ind])['acronym']
+        return f'x={x:1.4f}, y={x:1.4f}, {br.acronym[ind]} \n {ancestors}'
 
     ax.format_coord = format_coord
     return ax

diff --git a/ibllib/atlas/regions.py b/ibllib/atlas/regions.py
@@ -123,6 +123,26 @@ def leaves(self):
         leaves = np.setxor1d(self.id, self.parent)
         return self.get(np.int64(leaves[~np.isnan(leaves)]))
 
+    def propagate_down(self, acronyms, values):
+        """
+        This function remaps a set of user specified acronyms and values to the
+        swanson map, by filling down the child nodes when higher up values are
+        provided.
+        :param acronyms: list or array of allen ids or acronyms
+        :param values: list or array of associated values
+        :return:
+        """
+        user_aids = self.parse_acronyms_argument(acronyms)
+        _, user_indices = ismember(user_aids, self.id)
+        self.compute_hierarchy()
+        ia, ib = ismember(self.hierarchy, user_indices)
+        v = np.zeros_like(ia, dtype=np.float64) * np.NaN
+        v[ia] = values[ib]
+        all_values = np.nanmedian(v, axis=0)
+        indices = np.where(np.any(ia, axis=0))[0]
+        all_values = all_values[indices]
+        return indices, all_values
+
     def _mapping_from_regions_list(self, new_map, lateralize=False):
         """
         From a vector of regions id, creates a mapping such as
@@ -429,7 +449,7 @@ def _compute_mappings(self):
         """
         Recomputes the mapping indices for all mappings
         This is left mainly as a reference for adding future mappings as this take a few seconds
-        to execute. In production,we use the MAPPING_FILES pqt to avoid recompuing at each \
+        to execute. In production,we use the MAPPING_FILES pqt to avoid recomputing at each \
         instantiation
         """
         beryl = np.load(Path(__file__).parent.joinpath('beryl.npy'))
@@ -447,6 +467,33 @@ def _compute_mappings(self):
         }
         pd.DataFrame(self.mappings).to_parquet(FILE_MAPPINGS)
 
+    def compute_hierarchy(self):
+        """
+        Creates a self.hierarchy attributes that is a n_levels by n_region array
+        of indices. This is useful to perform fast vectorized computations of
+        ancestors and descendants.
+        :return:
+        """
+        if hasattr(self, 'hierarchy'):
+            return
+        n_levels = np.max(self.level)
+        n_regions = self.id.size
+        # creates the parent index. Void and root are omitted from intersection
+        # as they figure as NaN
+        pmask, i_p = ismember(self.parent, self.id)
+        self.iparent = np.arange(n_regions)
+        self.iparent[pmask] = i_p
+        # the last level of the hierarchy is the actual mapping, then going up level per level
+        # we assign the parend index
+        self.hierarchy = np.tile(np.arange(n_regions), (n_levels, 1))
+        _mask = np.zeros(n_regions, bool)
+        for lev in np.flipud(np.arange(n_levels)):
+            if lev < (n_levels - 1):
+                self.hierarchy[lev, _mask] = self.iparent[self.hierarchy[lev + 1, _mask]]
+            sel = self.level == (lev + 1)
+            self.hierarchy[lev, sel] = np.where(sel)[0]
+            _mask[sel] = True
+
     def remap(self, region_ids, source_map='Allen', target_map='Beryl'):
         """
         Remap atlas regions ids from source map to target map

diff --git a/ibllib/oneibl/patcher.py b/ibllib/oneibl/patcher.py
@@ -111,7 +111,7 @@ def register_dataset(self, file_list, **kwargs):
         :param dry: (bool) False by default
         :return:
         """
-        return register_dataset(file_list, one=self.one, server_only=True, **kwargs)
+        return register_dataset(file_list, one=self.one, server_only=True, exists=True, **kwargs)
 
     def register_datasets(self, file_list, **kwargs):
         """

diff --git a/ibllib/oneibl/registration.py b/ibllib/oneibl/registration.py
@@ -46,7 +46,7 @@ def _check_filename_for_registration(full_file, patterns):
 
 
 def register_dataset(file_list, one=None, created_by=None, repository=None, server_only=False,
-                     versions=None, default=True, dry=False, max_md5_size=None):
+                     versions=None, default=True, dry=False, max_md5_size=None, exists=False):
     """
     Registers a set of files belonging to a session only on the server
     :param file_list: (list of pathlib.Path or pathlib.Path)
@@ -99,7 +99,8 @@ def register_dataset(file_list, one=None, created_by=None, repository=None, serv
          'hashes': hashes,
          'filesizes': [p.stat().st_size for p in file_list],
          'versions': versions,
-         'default': default}
+         'default': default,
+         'exists': exists}
     if not dry:
         if one is None:
             one = ONE(cache_rest=None)
@@ -142,7 +143,9 @@ def register_session_raw_data(session_path, one=None, overwrite=False, dry=False
     # filter 2/2 unless overwrite is True, filter out the datasets that already exist
     if not overwrite:
         files_2_register = list(filter(lambda f: f not in already_registered, files_2_register))
-    response = register_dataset(files_2_register, one=one, versions=None, dry=dry, **kwargs)
+
+    data_repo = get_local_data_repository(one)
+    response = register_dataset(files_2_register, one=one, versions=None, dry=dry, repository=data_repo, **kwargs)
     return files_2_register, response
 
 
@@ -493,3 +496,23 @@ def _glob_session(ses_path):
     for gp in REGISTRATION_GLOB_PATTERNS:
         fl.extend(list(ses_path.glob(gp)))
     return fl
+
+
+def get_local_data_repository(one):
+    """
+    Get local data repo name from globus client
+    :param one:
+    :return:
+    """
+    if one is None:
+        return
+
+    if not Path.home().joinpath(".globusonline/lta/client-id.txt").exists():
+        return
+
+    with open(Path.home().joinpath(".globusonline/lta/client-id.txt"), 'r') as fid:
+        globus_id = fid.read()
+
+    data_repo = one.alyx.rest('data-repository', 'list', globus_endpoint_id=globus_id)
+    if len(data_repo):
+        return [da['name'] for da in data_repo][0]
diff --git a/ibllib/tests/test_atlas.py b/ibllib/tests/test_atlas.py
@@ -284,6 +284,26 @@ def test_argument_parser(self):
             self.brs.parse_acronyms_argument(acronyms + ['toto'])
         assert np.all(self.brs.parse_acronyms_argument(acronyms + ['toto'], mode='clip') == ids)
 
+    def test_compute_hierarchy(self):
+        self.brs.compute_hierarchy()
+        np.testing.assert_equal(
+            self.brs.hierarchy[:, 349], np.array([2, 3, 4, 5, 6, 340, 347, 349, 349, 349]))
+
+    def test_propagate_down(self):
+        acronyms = ['CB', 'ANcr2']
+        ind0 = ismember(self.brs.acronym2id(acronyms), self.brs.id)[1]
+        ind1 = [7, 45, 66]
+        for ind in [ind0, ind1]:
+            with self.subTest(indices=ind):
+                acronyms = self.brs.acronym[ind]
+                values = np.arange(len(acronyms))
+                ibr, ivals = self.brs.propagate_down(acronyms, values)
+                idesc = []
+                for c, i in enumerate(ind):
+                    ii = ismember(self.brs.descendants(self.brs.id[i])['id'], self.brs.id)[1]
+                    idesc.append(ii)
+                np.testing.assert_equal(ibr, np.unique(np.concatenate(idesc)))
+
 
 class TestAtlasPlots(unittest.TestCase):
 

diff --git a/release_notes.md b/release_notes.md
@@ -1,11 +1,19 @@
+## Release 2.16
+### Release Notes 2.16.0 2022-09-27
+### features
+- swanson flatmap: the algorithm to propagate down the hierarchy has been refined
+
+### bugfixes
+- set exists flag to false for all data repos when registering datasets with tasks
+
 ## Release Notes 2.15
 ### Release Notes 2.15.3 - 2022-09-26
 - SessionLoader error handling and bug fix
 
 ### Release Notes 2.15.2 - 2022-09-22
 - extraction pipeline: fix unpacking of empty arguments field from alyx dict that prevents running task 
 
-### Release Notes 2.15.1
+### Release Notes 2.15.1 - 2022-09-21
 - atlas: gene-expression backend and MRI Toronto atlas stretch and squeeze factors (Dan/Olivier)
 - FDR correction (Benjamin-Hochmann) to correct for multiple testing optional (Guido)
 - SpikeSortingLoader can be used with ONE local mode (Julia)