diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index c9b25a778..ee55ea9d0 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -866,13 +866,21 @@ def _get_attributes(dataset_types):
         waveform_attributes = list(set(WAVEFORMS_ATTRIBUTES + waveform_attributes))
         return {'spikes': spike_attributes, 'clusters': cluster_attributes, 'waveforms': waveform_attributes}
 
-    def _get_spike_sorting_collection(self, spike_sorter='pykilosort'):
+    def _get_spike_sorting_collection(self, spike_sorter=None):
         """
         Filters a list or array of collections to get the relevant spike sorting dataset
         if there is a pykilosort, load it
         """
-        collection = next(filter(lambda c: c == f'alf/{self.pname}/{spike_sorter}', self.collections), None)
-        # otherwise, prefers the shortest
+        for sorter in list([spike_sorter, 'iblsorter', 'pykilosort']):
+            if sorter is None:
+                continue
+            if sorter == "":
+                collection = next(filter(lambda c: c == f'alf/{self.pname}', self.collections), None)
+            else:
+                collection = next(filter(lambda c: c == f'alf/{self.pname}/{sorter}', self.collections), None)
+            if collection is not None:
+                return collection
+        # if none is found amongst the defaults, prefers the shortest
         collection = collection or next(iter(sorted(filter(lambda c: f'alf/{self.pname}' in c, self.collections), key=len)), None)
         _logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}")
         return collection
@@ -982,14 +990,13 @@ def download_raw_waveforms(self, **kwargs):
         """
         _logger.debug(f"loading waveforms from {self.collection}")
         return self.one.load_object(
-            self.eid, "waveforms",
-            attribute=["traces", "templates", "table", "channels"],
+            id=self.eid, obj="waveforms", attribute=["traces", "templates", "table", "channels"],
             collection=self._get_spike_sorting_collection("pykilosort"), download_only=True, **kwargs
         )
 
     def raw_waveforms(self, **kwargs):
         wf_paths = self.download_raw_waveforms(**kwargs)
-        return WaveformsLoader(wf_paths[0].parent, wfs_dtype=np.float16)
+        return WaveformsLoader(wf_paths[0].parent)
 
     def load_channels(self, **kwargs):
         """
@@ -1022,7 +1029,7 @@ def load_channels(self, **kwargs):
             self.histology = 'alf'
         return Bunch(channels)
 
-    def load_spike_sorting(self, spike_sorter='pykilosort', revision=None, enforce_version=True, good_units=False, **kwargs):
+    def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False, **kwargs):
         """
         Loads spikes, clusters and channels
 
diff --git a/brainbox/io/spikeglx.py b/brainbox/io/spikeglx.py
index 9c0618c11..fff72c5f2 100644
--- a/brainbox/io/spikeglx.py
+++ b/brainbox/io/spikeglx.py
@@ -128,6 +128,7 @@ def __init__(self, pid, one, typ='ap', cache_folder=None, remove_cached=False):
         self.file_chunks = self.one.load_dataset(self.eid, f'*.{typ}.ch', collection=f"*{self.pname}")
         meta_file = self.one.load_dataset(self.eid, f'*.{typ}.meta', collection=f"*{self.pname}")
         cbin_rec = self.one.list_datasets(self.eid, collection=f"*{self.pname}", filename=f'*{typ}.*bin', details=True)
+        cbin_rec.index = cbin_rec.index.map(lambda x: (self.eid, x))
         self.url_cbin = self.one.record2url(cbin_rec)[0]
         with open(self.file_chunks, 'r') as f:
             self.chunks = json.load(f)
diff --git a/ibllib/__init__.py b/ibllib/__init__.py
index efa1151d9..9da654573 100644
--- a/ibllib/__init__.py
+++ b/ibllib/__init__.py
@@ -2,7 +2,7 @@
 import logging
 import warnings
 
-__version__ = '2.39.1'
+__version__ = '2.40.0'
 warnings.filterwarnings('always', category=DeprecationWarning, module='ibllib')
 
 # if this becomes a full-blown library we should let the logging configuration to the discretion of the dev
diff --git a/ibllib/ephys/sync_probes.py b/ibllib/ephys/sync_probes.py
index 3f3411479..54106b245 100644
--- a/ibllib/ephys/sync_probes.py
+++ b/ibllib/ephys/sync_probes.py
@@ -47,7 +47,7 @@ def sync(ses_path, **kwargs):
         return version3B(ses_path, **kwargs)
 
 
-def version3A(ses_path, display=True, type='smooth', tol=2.1):
+def version3A(ses_path, display=True, type='smooth', tol=2.1, probe_names=None):
     """
     From a session path with _spikeglx_sync arrays extracted, locate ephys files for 3A and
      outputs one sync.timestamps.probeN.npy file per acquired probe. By convention the reference
diff --git a/ibllib/io/extractors/biased_trials.py b/ibllib/io/extractors/biased_trials.py
index 06baa5d03..1226d2611 100644
--- a/ibllib/io/extractors/biased_trials.py
+++ b/ibllib/io/extractors/biased_trials.py
@@ -97,7 +97,7 @@ class TrialsTableBiased(BaseBpodTrialsExtractor):
     save_names = ('_ibl_trials.table.pqt', None, None, '_ibl_wheel.timestamps.npy', '_ibl_wheel.position.npy',
                   '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None, None)
     var_names = ('table', 'stimOff_times', 'stimFreeze_times', 'wheel_timestamps', 'wheel_position', 'wheelMoves_intervals',
-                 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement')
+                 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times', 'is_final_movement')
 
     def _extract(self, extractor_classes=None, **kwargs):
         extractor_classes = extractor_classes or []
@@ -125,7 +125,7 @@ class TrialsTableEphys(BaseBpodTrialsExtractor):
                   '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None,
                   None, None, None, '_ibl_trials.quiescencePeriod.npy')
     var_names = ('table', 'stimOff_times', 'stimFreeze_times', 'wheel_timestamps', 'wheel_position', 'wheelMoves_intervals',
-                 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement',
+                 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times', 'is_final_movement',
                  'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs):
@@ -152,12 +152,12 @@ class BiasedTrials(BaseBpodTrialsExtractor):
     save_names = ('_ibl_trials.goCueTrigger_times.npy', '_ibl_trials.stimOnTrigger_times.npy', None,
                   '_ibl_trials.stimOffTrigger_times.npy', None, None, '_ibl_trials.table.pqt',
                   '_ibl_trials.stimOff_times.npy', None, '_ibl_wheel.timestamps.npy', '_ibl_wheel.position.npy',
-                  '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None, None, '_ibl_trials.included.npy',
-                  None, None, '_ibl_trials.quiescencePeriod.npy')
+                  '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None, None,
+                  '_ibl_trials.included.npy', None, None, '_ibl_trials.quiescencePeriod.npy')
     var_names = ('goCueTrigger_times', 'stimOnTrigger_times', 'itiIn_times', 'stimOffTrigger_times', 'stimFreezeTrigger_times',
                  'errorCueTrigger_times', 'table', 'stimOff_times', 'stimFreeze_times', 'wheel_timestamps', 'wheel_position',
-                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement', 'included',
-                 'phase', 'position', 'quiescence')
+                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times', 'is_final_movement',
+                 'included', 'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs) -> dict:
         extractor_classes = extractor_classes or []
@@ -182,8 +182,8 @@ class EphysTrials(BaseBpodTrialsExtractor):
                   '_ibl_trials.included.npy', None, None, '_ibl_trials.quiescencePeriod.npy')
     var_names = ('goCueTrigger_times', 'stimOnTrigger_times', 'itiIn_times', 'stimOffTrigger_times', 'stimFreezeTrigger_times',
                  'errorCueTrigger_times', 'table', 'stimOff_times', 'stimFreeze_times', 'wheel_timestamps', 'wheel_position',
-                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement', 'included',
-                 'phase', 'position', 'quiescence')
+                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times', 'is_final_movement',
+                 'included', 'phase', 'position', 'quiescence')
 
     def _extract(self, extractor_classes=None, **kwargs) -> dict:
         extractor_classes = extractor_classes or []
diff --git a/ibllib/io/extractors/bpod_trials.py b/ibllib/io/extractors/bpod_trials.py
index 00f21eb06..3089b52cf 100644
--- a/ibllib/io/extractors/bpod_trials.py
+++ b/ibllib/io/extractors/bpod_trials.py
@@ -3,17 +3,14 @@
 This module will extract the Bpod trials and wheel data based on the task protocol,
 i.e. habituation, training or biased.
 """
-import logging
 import importlib
 
-from ibllib.io.extractors.base import get_bpod_extractor_class, protocol2extractor
+from ibllib.io.extractors.base import get_bpod_extractor_class, protocol2extractor, BaseExtractor
 from ibllib.io.extractors.habituation_trials import HabituationTrials
 from ibllib.io.extractors.training_trials import TrainingTrials
 from ibllib.io.extractors.biased_trials import BiasedTrials, EphysTrials
 from ibllib.io.extractors.base import BaseBpodTrialsExtractor
 
-_logger = logging.getLogger(__name__)
-
 
 def get_bpod_extractor(session_path, protocol=None, task_collection='raw_behavior_data') -> BaseBpodTrialsExtractor:
     """
@@ -39,20 +36,25 @@ def get_bpod_extractor(session_path, protocol=None, task_collection='raw_behavio
         'BiasedTrials': BiasedTrials,
         'EphysTrials': EphysTrials
     }
+
     if protocol:
-        class_name = protocol2extractor(protocol)
+        extractor_class_name = protocol2extractor(protocol)
     else:
-        class_name = get_bpod_extractor_class(session_path, task_collection=task_collection)
-    if class_name in builtins:
-        return builtins[class_name](session_path)
+        extractor_class_name = get_bpod_extractor_class(session_path, task_collection=task_collection)
+    if extractor_class_name in builtins:
+        return builtins[extractor_class_name](session_path)
 
     # look if there are custom extractor types in the personal projects repo
-    if not class_name.startswith('projects.'):
-        class_name = 'projects.' + class_name
-    module, class_name = class_name.rsplit('.', 1)
+    if not extractor_class_name.startswith('projects.'):
+        extractor_class_name = 'projects.' + extractor_class_name
+    module, extractor_class_name = extractor_class_name.rsplit('.', 1)
     mdl = importlib.import_module(module)
-    extractor_class = getattr(mdl, class_name, None)
+    extractor_class = getattr(mdl, extractor_class_name, None)
     if extractor_class:
-        return extractor_class(session_path)
+        my_extractor = extractor_class(session_path)
+        if not isinstance(my_extractor, BaseExtractor):
+            raise ValueError(
+                f"{my_extractor} should be an Extractor class inheriting from ibllib.io.extractors.base.BaseExtractor")
+        return my_extractor
     else:
-        raise ValueError(f'extractor {class_name} not found')
+        raise ValueError(f'extractor {extractor_class_name} not found')
diff --git a/ibllib/io/extractors/ephys_fpga.py b/ibllib/io/extractors/ephys_fpga.py
index e7ae93220..c941ce4c8 100644
--- a/ibllib/io/extractors/ephys_fpga.py
+++ b/ibllib/io/extractors/ephys_fpga.py
@@ -585,13 +585,13 @@ class FpgaTrials(extractors_base.BaseExtractor):
                   '_ibl_trials.stimOff_times.npy', None, None, None, '_ibl_trials.quiescencePeriod.npy',
                   '_ibl_trials.table.pqt', '_ibl_wheel.timestamps.npy',
                   '_ibl_wheel.position.npy', '_ibl_wheelMoves.intervals.npy',
-                  '_ibl_wheelMoves.peakAmplitude.npy')
+                  '_ibl_wheelMoves.peakAmplitude.npy', None)
     var_names = ('goCueTrigger_times', 'stimOnTrigger_times',
                  'stimOffTrigger_times', 'stimFreezeTrigger_times', 'errorCueTrigger_times',
                  'errorCue_times', 'itiIn_times', 'stimFreeze_times', 'stimOff_times',
                  'valveOpen_times', 'phase', 'position', 'quiescence', 'table',
                  'wheel_timestamps', 'wheel_position',
-                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude')
+                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times')
 
     bpod_rsync_fields = ('intervals', 'response_times', 'goCueTrigger_times',
                          'stimOnTrigger_times', 'stimOffTrigger_times',
diff --git a/ibllib/io/extractors/fibrephotometry.py b/ibllib/io/extractors/fibrephotometry.py
index d11a5856e..0c7e97c07 100644
--- a/ibllib/io/extractors/fibrephotometry.py
+++ b/ibllib/io/extractors/fibrephotometry.py
@@ -225,7 +225,7 @@ def _extract(self, light_source_map=None, collection=None, regions=None, **kwarg
         regions = regions or [k for k in fp_data['raw'].keys() if 'Region' in k]
         out_df = fp_data['raw'].filter(items=regions, axis=1).sort_index(axis=1)
         out_df['times'] = ts
-        out_df['wavelength'] = np.NaN
+        out_df['wavelength'] = np.nan
         out_df['name'] = ''
         out_df['color'] = ''
         # Extract channel index
diff --git a/ibllib/io/extractors/mesoscope.py b/ibllib/io/extractors/mesoscope.py
index a9162b0c5..a3a4a7d58 100644
--- a/ibllib/io/extractors/mesoscope.py
+++ b/ibllib/io/extractors/mesoscope.py
@@ -4,8 +4,8 @@
 import numpy as np
 from scipy.signal import find_peaks
 import one.alf.io as alfio
-from one.util import ensure_list
 from one.alf.files import session_path_parts
+from iblutil.util import ensure_list
 import matplotlib.pyplot as plt
 from packaging import version
 
diff --git a/ibllib/io/extractors/opto_trials.py b/ibllib/io/extractors/opto_trials.py
index 9b720259e..b3a931c0d 100644
--- a/ibllib/io/extractors/opto_trials.py
+++ b/ibllib/io/extractors/opto_trials.py
@@ -16,8 +16,8 @@ class LaserBool(BaseBpodTrialsExtractor):
     def _extract(self, **kwargs):
         _logger.info('Extracting laser datasets')
         # reference pybpod implementation
-        lstim = np.array([float(t.get('laser_stimulation', np.NaN)) for t in self.bpod_trials])
-        lprob = np.array([float(t.get('laser_probability', np.NaN)) for t in self.bpod_trials])
+        lstim = np.array([float(t.get('laser_stimulation', np.nan)) for t in self.bpod_trials])
+        lprob = np.array([float(t.get('laser_probability', np.nan)) for t in self.bpod_trials])
 
         # Karolina's choice world legacy implementation - from Slack message:
         # it is possible that some versions I have used:
@@ -30,9 +30,9 @@ def _extract(self, **kwargs):
         # laserOFF_trials=(optoOUT ==0);
         if 'PROBABILITY_OPTO' in self.settings.keys() and np.all(np.isnan(lstim)):
             lprob = np.zeros_like(lprob) + self.settings['PROBABILITY_OPTO']
-            lstim = np.array([float(t.get('opto_ON_time', np.NaN)) for t in self.bpod_trials])
+            lstim = np.array([float(t.get('opto_ON_time', np.nan)) for t in self.bpod_trials])
             if np.all(np.isnan(lstim)):
-                lstim = np.array([float(t.get('optoOUT', np.NaN)) for t in self.bpod_trials])
+                lstim = np.array([float(t.get('optoOUT', np.nan)) for t in self.bpod_trials])
                 lstim[lstim == 255] = 1
             else:
                 lstim[~np.isnan(lstim)] = 1
diff --git a/ibllib/io/extractors/training_trials.py b/ibllib/io/extractors/training_trials.py
index 45195556e..6b787340d 100644
--- a/ibllib/io/extractors/training_trials.py
+++ b/ibllib/io/extractors/training_trials.py
@@ -32,7 +32,7 @@ def _extract(self):
         feedbackType = np.zeros(len(self.bpod_trials), np.int64)
         for i, t in enumerate(self.bpod_trials):
             state_names = ['correct', 'error', 'no_go', 'omit_correct', 'omit_error', 'omit_no_go']
-            outcome = {sn: ~np.isnan(t['behavior_data']['States timestamps'].get(sn, [[np.NaN]])[0][0]) for sn in state_names}
+            outcome = {sn: ~np.isnan(t['behavior_data']['States timestamps'].get(sn, [[np.nan]])[0][0]) for sn in state_names}
             assert np.sum(list(outcome.values())) == 1
             outcome = next(k for k in outcome if outcome[k])
             if outcome == 'correct':
@@ -709,7 +709,7 @@ class TrialsTable(BaseBpodTrialsExtractor):
     save_names = ('_ibl_trials.table.pqt', None, None, '_ibl_wheel.timestamps.npy', '_ibl_wheel.position.npy',
                   '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None, None)
     var_names = ('table', 'stimOff_times', 'stimFreeze_times', 'wheel_timestamps', 'wheel_position', 'wheelMoves_intervals',
-                 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'is_final_movement')
+                 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times', 'is_final_movement')
 
     def _extract(self, extractor_classes=None, **kwargs):
         base = [Intervals, GoCueTimes, ResponseTimes, Choice, StimOnOffFreezeTimes, ContrastLR, FeedbackTimes, FeedbackType,
@@ -732,7 +732,7 @@ class TrainingTrials(BaseBpodTrialsExtractor):
     var_names = ('repNum', 'goCueTrigger_times', 'stimOnTrigger_times', 'itiIn_times', 'stimOffTrigger_times',
                  'stimFreezeTrigger_times', 'errorCueTrigger_times', 'table', 'stimOff_times', 'stimFreeze_times',
                  'wheel_timestamps', 'wheel_position', 'wheelMoves_intervals', 'wheelMoves_peakAmplitude',
-                 'peakVelocity_times', 'is_final_movement', 'phase', 'position', 'quiescence', 'pause_duration')
+                 'wheelMoves_peakVelocity_times', 'is_final_movement', 'phase', 'position', 'quiescence', 'pause_duration')
 
     def _extract(self) -> dict:
         base = [RepNum, GoCueTriggerTimes, StimOnTriggerTimes, ItiInTimes, StimOffTriggerTimes, StimFreezeTriggerTimes,
diff --git a/ibllib/io/extractors/training_wheel.py b/ibllib/io/extractors/training_wheel.py
index d9f97059f..fa4b2f300 100644
--- a/ibllib/io/extractors/training_wheel.py
+++ b/ibllib/io/extractors/training_wheel.py
@@ -330,11 +330,6 @@ def extract_first_movement_times(wheel_moves, trials, min_qt=None):
     gap between quiescence end and cue start, or during the quiescence period but sub-
     threshold).  The movement is sufficiently large if it is greater than or equal to THRESH.
 
-    :param wheel_moves:
-    :param trials: dictionary of trial data
-    :param min_qt:
-    :return: numpy array of
-
     Parameters
     ----------
     wheel_moves : dict
@@ -407,9 +402,9 @@ class Wheel(BaseBpodTrialsExtractor):
     save_names = ('_ibl_wheel.timestamps.npy', '_ibl_wheel.position.npy',
                   '_ibl_wheelMoves.intervals.npy', '_ibl_wheelMoves.peakAmplitude.npy', None,
                   '_ibl_trials.firstMovement_times.npy', None)
-    var_names = ('wheel_timestamps', 'wheel_position', 'wheelMoves_intervals',
-                 'wheelMoves_peakAmplitude', 'peakVelocity_times', 'firstMovement_times',
-                 'is_final_movement')
+    var_names = ('wheel_timestamps', 'wheel_position',
+                 'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times',
+                 'firstMovement_times', 'is_final_movement')
 
     def _extract(self):
         ts, pos = get_wheel_position(self.session_path, self.bpod_trials, task_collection=self.task_collection)
@@ -425,6 +420,5 @@ def _extract(self):
         min_qt = self.settings.get('QUIESCENT_PERIOD', None)
 
         first_moves, is_final, _ = extract_first_movement_times(moves, trials, min_qt=min_qt)
-        output = (ts, pos, moves['intervals'], moves['peakAmplitude'],
-                  moves['peakVelocity_times'], first_moves, is_final)
+        output = (ts, pos, moves['intervals'], moves['peakAmplitude'], moves['peakVelocity_times'], first_moves, is_final)
         return output
diff --git a/ibllib/io/session_params.py b/ibllib/io/session_params.py
index c344795c9..e9127e9ae 100644
--- a/ibllib/io/session_params.py
+++ b/ibllib/io/session_params.py
@@ -27,6 +27,7 @@
 import logging
 import socket
 from pathlib import Path
+from itertools import chain
 from copy import deepcopy
 
 from one.converters import ConversionMixin
@@ -77,6 +78,9 @@ def _patch_file(data: dict) -> dict:
             if 'tasks' in data and isinstance(data['tasks'], dict):
                 data['tasks'] = [{k: v} for k, v in data['tasks'].copy().items()]
         data['version'] = SPEC_VERSION
+        # Ensure all items in tasks list are single value dicts
+        if 'tasks' in data:
+            data['tasks'] = [{k: v} for k, v in chain.from_iterable(map(dict.items, data['tasks']))]
     return data
 
 
@@ -168,8 +172,19 @@ def merge_params(a, b, copy=False):
             assert k not in a or a[k] == b[k], 'multiple sync fields defined'
         if isinstance(b[k], list):
             prev = list(a.get(k, []))
-            # For procedures and projects, remove duplicates
-            to_add = b[k] if k == 'tasks' else set(b[k]) - set(prev)
+            if k == 'tasks':
+                # For tasks, keep order and skip duplicates
+                # Assert tasks is a list of single value dicts
+                assert (not prev or set(map(len, prev)) == {1}) and set(map(len, b[k])) == {1}
+                # Convert protocol -> dict map to hashable tuple of protocol + sorted key value pairs
+                to_hashable = lambda itm: (itm[0], *chain.from_iterable(sorted(itm[1].items())))  # noqa
+                # Get the set of previous tasks
+                prev_tasks = set(map(to_hashable, chain.from_iterable(map(dict.items, prev))))
+                tasks = chain.from_iterable(map(dict.items, b[k]))
+                to_add = [dict([itm]) for itm in tasks if to_hashable(itm) not in prev_tasks]
+            else:
+                # For procedures and projects, remove duplicates
+                to_add = set(b[k]) - set(prev)
             a[k] = prev + list(to_add)
         elif isinstance(b[k], dict):
             a[k] = {**a.get(k, {}), **b[k]}
diff --git a/ibllib/oneibl/data_handlers.py b/ibllib/oneibl/data_handlers.py
index 8c76a25cd..b0c40c735 100644
--- a/ibllib/oneibl/data_handlers.py
+++ b/ibllib/oneibl/data_handlers.py
@@ -15,13 +15,13 @@
 
 from one.api import ONE
 from one.webclient import AlyxClient
-from one.util import filter_datasets, ensure_list
+from one.util import filter_datasets
 from one.alf.files import add_uuid_string, session_path_parts
 from one.alf.cache import _make_datasets_df
-from iblutil.util import flatten
+from iblutil.util import flatten, ensure_list
 
 from ibllib.oneibl.registration import register_dataset, get_lab, get_local_data_repository
-from ibllib.oneibl.patcher import FTPPatcher, SDSCPatcher, SDSC_ROOT_PATH, SDSC_PATCH_PATH
+from ibllib.oneibl.patcher import FTPPatcher, SDSCPatcher, SDSC_ROOT_PATH, SDSC_PATCH_PATH, S3Patcher
 
 
 _logger = logging.getLogger(__name__)
@@ -747,6 +747,38 @@ def cleanUp(self):
             os.unlink(file)
 
 
+class RemoteEC2DataHandler(DataHandler):
+    def __init__(self, session_path, signature, one=None):
+        """
+        Data handler for running tasks on remote compute node. Will download missing data via http using ONE
+
+        :param session_path: path to session
+        :param signature: input and output file signatures
+        :param one: ONE instance
+        """
+        super().__init__(session_path, signature, one=one)
+
+    def setUp(self):
+        """
+        Function to download necessary data to run tasks using ONE
+        :return:
+        """
+        df = super().getData()
+        self.one._check_filesystem(df)
+
+    def uploadData(self, outputs, version, **kwargs):
+        """
+        Function to upload and register data of completed task via S3 patcher
+        :param outputs: output files from task to register
+        :param version: ibllib version
+        :return: output info of registered datasets
+        """
+        versions = super().uploadData(outputs, version)
+        s3_patcher = S3Patcher(one=self.one)
+        return s3_patcher.patch_dataset(outputs, created_by=self.one.alyx.user,
+                                        versions=versions, **kwargs)
+
+
 class RemoteHttpDataHandler(DataHandler):
     def __init__(self, session_path, signature, one=None):
         """
diff --git a/ibllib/oneibl/patcher.py b/ibllib/oneibl/patcher.py
index 94d59a35e..22f682df4 100644
--- a/ibllib/oneibl/patcher.py
+++ b/ibllib/oneibl/patcher.py
@@ -33,14 +33,14 @@
 
 import globus_sdk
 import iblutil.io.params as iopar
-from one.alf.files import get_session_path, add_uuid_string
+from iblutil.util import ensure_list
+from one.alf.files import get_session_path, add_uuid_string, full_path_parts
 from one.alf.spec import is_uuid_string, is_uuid
 from one import params
 from one.webclient import AlyxClient
 from one.converters import path_from_dataset
 from one.remote import globus
-from one.remote.aws import url2uri
-from one.util import ensure_list
+from one.remote.aws import url2uri, get_s3_from_alyx
 
 from ibllib.oneibl.registration import register_dataset
 
@@ -633,3 +633,55 @@ def _scp(self, local_path, remote_path, dry=True):
     def _rm(self, flatiron_path, dry=True):
         raise PermissionError("This Patcher does not have admin permissions to remove data "
                               "from the FlatIron server")
+
+
+class S3Patcher(Patcher):
+
+    def __init__(self, one=None):
+        assert one
+        super().__init__(one=one)
+        self.s3_repo = 's3_patcher'
+        self.s3_path = 'patcher'
+
+        # Instantiate boto connection
+        self.s3, self.bucket = get_s3_from_alyx(self.one.alyx, repo_name=self.s3_repo)
+
+    def check_datasets(self, file_list):
+        # Here we want to check if the datasets exist, if they do we don't want to patch unless we force.
+        exists = []
+        for file in file_list:
+            collection = full_path_parts(file, as_dict=True)['collection']
+            dset = self.one.alyx.rest('datasets', 'list', session=self.one.path2eid(file), name=file.name,
+                                      collection=collection, clobber=True)
+            if len(dset) > 0:
+                exists.append(file)
+
+        return exists
+
+    def patch_dataset(self, file_list, dry=False, ftp=False, force=False, **kwargs):
+
+        exists = self.check_datasets(file_list)
+        if len(exists) > 0 and not force:
+            _logger.error(f'Files: {", ".join([f.name for f in file_list])} already exist, to force set force=True')
+            return
+
+        response = super().patch_dataset(file_list, dry=dry, repository=self.s3_repo, ftp=False, **kwargs)
+        # TODO in an ideal case the flatiron filerecord won't be altered when we register this dataset. This requires
+        # changing the the alyx.data.register_view
+        for ds in response:
+            frs = ds['file_records']
+            fr_server = next(filter(lambda fr: 'flatiron' in fr['data_repository'], frs))
+            # Update the flatiron file record to be false
+            self.one.alyx.rest('files', 'partial_update', id=fr_server['id'],
+                               data={'exists': False})
+
+    def _scp(self, local_path, remote_path, dry=True):
+
+        aws_remote_path = Path(self.s3_path).joinpath(remote_path.relative_to(FLATIRON_MOUNT))
+        _logger.info(f'Transferring file {local_path} to {aws_remote_path}')
+        self.s3.Bucket(self.bucket).upload_file(str(PurePosixPath(local_path)), str(PurePosixPath(aws_remote_path)))
+
+        return 0, ''
+
+    def _rm(self, *args, **kwargs):
+        raise PermissionError("This Patcher does not have admin permissions to remove data.")
diff --git a/ibllib/oneibl/registration.py b/ibllib/oneibl/registration.py
index 6f1a33f9f..2b410e3ef 100644
--- a/ibllib/oneibl/registration.py
+++ b/ibllib/oneibl/registration.py
@@ -12,8 +12,9 @@
 from one.webclient import AlyxClient, no_cache
 from one.converters import ConversionMixin
 import one.alf.exceptions as alferr
-from one.util import datasets2records, ensure_list
 from one.api import ONE
+from one.util import datasets2records
+from iblutil.util import ensure_list
 
 import ibllib
 import ibllib.io.extractors.base
diff --git a/ibllib/pipes/base_tasks.py b/ibllib/pipes/base_tasks.py
index c679139af..475f2b650 100644
--- a/ibllib/pipes/base_tasks.py
+++ b/ibllib/pipes/base_tasks.py
@@ -4,8 +4,7 @@
 
 from packaging import version
 from one.webclient import no_cache
-from one.util import ensure_list
-from iblutil.util import flatten
+from iblutil.util import flatten, ensure_list
 import matplotlib.image
 from skimage.io import ImageCollection, imread
 
@@ -551,7 +550,7 @@ def register_snapshots(self, unlink=False, collection=None):
                 snapshot = self._save_as_png(snapshot_tif := snapshot)
                 if unlink:
                     snapshot_tif.unlink()
-            _logger.debug('Uploading "%s"...', snapshot.relative_to(self.session_path))
+            _logger.info('Uploading "%s"...', snapshot.relative_to(self.session_path))
             if snapshot.with_suffix('.txt').exists():
                 with open(snapshot.with_suffix('.txt'), 'r') as txt_file:
                     note['text'] = txt_file.read().strip()
diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 5c2fc224f..c2c41bd03 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -490,8 +490,6 @@ def make_pipeline(session_path, **pkwargs):
 
             tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})(
                 **kwargs, **ephys_kwargs, pname=pname, parents=register_task)
-            tasks[f'EphysCellQC_{pname}'] = type(f'EphysCellQC_{pname}', (etasks.EphysCellsQc,), {})(
-                **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'Spikesorting_{pname}']])
 
     # Video tasks
     if 'cameras' in devices:
diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py
index 4d794b19f..8596a1619 100644
--- a/ibllib/pipes/ephys_tasks.py
+++ b/ibllib/pipes/ephys_tasks.py
@@ -355,9 +355,9 @@ class EphysSyncPulses(SyncPulses):
     @property
     def signature(self):
         signature = {
-            'input_files': [('*nidq.cbin', self.sync_collection, True),
+            'input_files': [('*nidq.cbin', self.sync_collection, False),
                             ('*nidq.ch', self.sync_collection, False),
-                            ('*nidq.meta', self.sync_collection, True),
+                            ('*nidq.meta', self.sync_collection, False),
                             ('*nidq.wiring.json', self.sync_collection, True)],
             'output_files': [('_spikeglx_sync.times.npy', self.sync_collection, True),
                              ('_spikeglx_sync.polarities.npy', self.sync_collection, True),
@@ -393,13 +393,19 @@ def __init__(self, *args, **kwargs):
     @property
     def signature(self):
         signature = {
-            'input_files': [('*ap.meta', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
-                           [('*ap.cbin', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
-                           [('*ap.ch', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
-                           [('*ap.wiring.json', f'{self.device_collection}/{pname}', False) for pname in self.pname] +
-                           [('_spikeglx_sync.times.npy', self.sync_collection, True),
-                            ('_spikeglx_sync.polarities.npy', self.sync_collection, True),
-                            ('_spikeglx_sync.channels.npy', self.sync_collection, True)],
+            'input_files':
+                [('*ap.meta', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
+                [('*ap.cbin', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
+                [('*ap.ch', f'{self.device_collection}/{pname}', True) for pname in self.pname] +
+                [('*ap.wiring.json', f'{self.device_collection}/{pname}', False) for pname in self.pname] +
+                [('_spikeglx_sync.times.*npy', f'{self.device_collection}/{pname}', False) for pname in self.pname] +
+                [('_spikeglx_sync.polarities.*npy', f'{self.device_collection}/{pname}', False) for pname in self.pname] +
+                [('_spikeglx_sync.channels.*npy', f'{self.device_collection}/{pname}', False) for pname in self.pname] +
+                [('_spikeglx_sync.times.*npy', self.sync_collection, True),
+                 ('_spikeglx_sync.polarities.*npy', self.sync_collection, True),
+                 ('_spikeglx_sync.channels.*npy', self.sync_collection, True),
+                 ('*ap.meta', self.sync_collection, True)
+                 ],
             'output_files': [(f'_spikeglx_sync.times.{pname}.npy', f'{self.device_collection}/{pname}', True)
                              for pname in self.pname] +
                             [(f'_spikeglx_sync.polarities.{pname}.npy', f'{self.device_collection}/{pname}', True)
@@ -517,8 +523,22 @@ def compute_cell_qc(folder_alf_probe):
             df_units = pd.concat(
                 [df_units, ks2_labels['ks2_label'].reindex(df_units.index)], axis=1)
         # save as parquet file
-        df_units.to_parquet(folder_alf_probe.joinpath("clusters.metrics.pqt"))
-        return folder_alf_probe.joinpath("clusters.metrics.pqt"), df_units, drift
+        df_units.to_parquet(file_metrics := folder_alf_probe.joinpath("clusters.metrics.pqt"))
+
+        assert np.all((df_units['bitwise_fail'] == 0) == (df_units['label'] == 1))  # useless but sanity check for OW
+
+        cok = df_units['bitwise_fail'] == 0
+        sok = cok[spikes['clusters']].values
+        spikes['templates'] = spikes['templates'].astype(np.uint16)
+        spikes['clusters'] = spikes['clusters'].astype(np.uint16)
+        spikes['depths'] = spikes['depths'].astype(np.float32)
+        spikes['amps'] = spikes['amps'].astype(np.float32)
+        file_passing = folder_alf_probe.joinpath('passingSpikes.table.pqt')
+        df_spikes = pd.DataFrame(spikes)
+        df_spikes = df_spikes.iloc[sok, :].reset_index(drop=True)
+        df_spikes.to_parquet(file_passing)
+
+        return [file_metrics, file_passing], df_units, drift
 
     def _label_probe_qc(self, folder_probe, df_units, drift):
         """
@@ -564,26 +584,87 @@ class SpikeSorting(base_tasks.EphysTask, CellQCMixin):
     priority = 60
     job_size = 'large'
     force = True
-
+    env = 'iblsorter'
+    _sortername = 'iblsorter'
     SHELL_SCRIPT = Path.home().joinpath(
-        "Documents/PYTHON/iblscripts/deploy/serverpc/iblsorter/run_iblsorter.sh"
+        f"Documents/PYTHON/iblscripts/deploy/serverpc/{_sortername}/sort_recording.sh"
     )
     SPIKE_SORTER_NAME = 'iblsorter'
-    PYKILOSORT_REPO = Path.home().joinpath('Documents/PYTHON/SPIKE_SORTING/ibl-sorter')
+    SORTER_REPOSITORY = Path.home().joinpath('Documents/PYTHON/SPIKE_SORTING/ibl-sorter')
 
     @property
     def signature(self):
         signature = {
-            'input_files': [('*ap.meta', f'{self.device_collection}/{self.pname}', True),
-                            ('*ap.*bin', f'{self.device_collection}/{self.pname}', True),
-                            ('*ap.ch', f'{self.device_collection}/{self.pname}', False),
-                            ('*sync.npy', f'{self.device_collection}/{self.pname}', True)],
-            'output_files': [('spike_sorting_pykilosort.log', f'spike_sorters/pykilosort/{self.pname}', True),
-                             ('_iblqc_ephysTimeRmsAP.rms.npy', f'{self.device_collection}/{self.pname}', True),
-                             ('_iblqc_ephysTimeRmsAP.timestamps.npy', f'{self.device_collection}/{self.pname}', True)]
+            'input_files': [
+                ('*ap.meta', f'{self.device_collection}/{self.pname}', True),
+                ('*ap.*bin', f'{self.device_collection}/{self.pname}', True),
+                ('*ap.ch', f'{self.device_collection}/{self.pname}', False),
+                ('*sync.npy', f'{self.device_collection}/{self.pname}', True)
+            ],
+            'output_files': [
+                # ./raw_ephys_data/probe00/
+                ('_iblqc_ephysTimeRmsAP.rms.npy', f'{self.device_collection}/{self.pname}/', True),
+                ('_iblqc_ephysTimeRmsAP.timestamps.npy', f'{self.device_collection}/{self.pname}/', True),
+                ('_iblqc_ephysSaturation.samples.npy', f'{self.device_collection}/{self.pname}/', True),
+                # ./spike_sorters/iblsorter/probe00
+                ('spike_sorting_iblsorter.log', f'spike_sorters/{self._sortername}/{self.pname}', True),
+                ('_kilosort_raw.output.tar', f'spike_sorters/{self._sortername}/{self.pname}/', True),
+                # ./alf/probe00/iblsorter
+                ('_kilosort_whitening.matrix.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('_phy_spikes_subset.channels.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('_phy_spikes_subset.spikes.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('_phy_spikes_subset.waveforms.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('channels.labels.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('channels.localCoordinates.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('channels.rawInd.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.amps.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.channels.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.depths.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.metrics.pqt', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.peakToTrough.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.uuids.csv', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.waveforms.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('clusters.waveformsChannels.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('drift.times.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('drift.um.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('drift_depths.um.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('passingSpikes.table.pqt', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.amps.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.clusters.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.depths.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.samples.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.templates.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('spikes.times.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('templates.amps.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('templates.waveforms.npy', f'alf/{self.pname}/{self._sortername}/', True),
+                ('templates.waveformsChannels.npy', f'alf/{self.pname}/{self._sortername}/', True),
+            ],
         }
         return signature
 
+    @property
+    def scratch_folder_run(self):
+        """
+        Constructs a path to a temporary folder for the spike sorting output and scratch files
+        This is usually on a high performance drive, and we should factor around 2.5 times the uncompressed raw recording size
+        For a scratch drive at /mnt/h0 we would have the following temp dir:
+        /mnt/h0/iblsorter_1.8.0_CSHL071_2020-10-04_001_probe01/
+        """
+        # get the scratch drive from the shell script
+        if self.scratch_folder is None:
+            with open(self.SHELL_SCRIPT) as fid:
+                lines = fid.readlines()
+            line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0]
+            m = re.search(r"\=(.*?)(\#|\n)", line)[0]
+            scratch_drive = Path(m[1:-1].strip())
+        else:
+            scratch_drive = self.scratch_folder
+        assert scratch_drive.exists(), f"Scratch drive {scratch_drive} not found"
+        # get the version of the sorter
+        self.version = self._fetch_iblsorter_version(self.SORTER_REPOSITORY)
+        spikesorter_dir = f"{self.version}_{'_'.join(list(self.session_path.parts[-3:]))}_{self.pname}"
+        return scratch_drive.joinpath(spikesorter_dir)
+
     @staticmethod
     def _sample2v(ap_file):
         md = spikeglx.read_meta_data(ap_file.with_suffix(".meta"))
@@ -597,7 +678,7 @@ def _fetch_iblsorter_version(repo_path):
             return f"iblsorter_{iblsorter.__version__}"
         except ImportError:
             _logger.info('IBL-sorter not in environment, trying to locate the repository')
-        init_file = Path(repo_path).joinpath('ibl-sorter', '__init__.py')
+        init_file = Path(repo_path).joinpath('iblsorter', '__init__.py')
         try:
             with open(init_file) as fid:
                 lines = fid.readlines()
@@ -619,7 +700,7 @@ def _fetch_iblsorter_run_version(log_file):
             line = fid.readline()
         version = re.search('version (.*), output', line)
         version = version or re.search('version (.*)', line)  # old versions have output, new have a version line
-        version = re.sub(r'\^\[{2}[0-9]+m', '', version.group(1))  # removes the coloring tags
+        version = re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', version.group(1))
         return version
 
     def _run_iblsort(self, ap_file):
@@ -629,9 +710,7 @@ def _run_iblsort(self, ap_file):
         (discontinued support for old spike sortings in the probe folder <1.5.5)
         :return: path of the folder containing ks2 spike sorting output
         """
-        self.version = self._fetch_iblsorter_version(self.PYKILOSORT_REPO)
-        label = ap_file.parts[-2]  # this is usually the probe name
-        sorter_dir = self.session_path.joinpath("spike_sorters", self.SPIKE_SORTER_NAME, label)
+        sorter_dir = self.session_path.joinpath("spike_sorters", self.SPIKE_SORTER_NAME, self.pname)
         self.FORCE_RERUN = False
         if not self.FORCE_RERUN:
             log_file = sorter_dir.joinpath(f"spike_sorting_{self.SPIKE_SORTER_NAME}.log")
@@ -643,24 +722,15 @@ def _run_iblsort(self, ap_file):
                     return sorter_dir
                 else:
                     self.FORCE_RERUN = True
-        # get the scratch drive from the shell script
-        with open(self.SHELL_SCRIPT) as fid:
-            lines = fid.readlines()
-        line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0]
-        m = re.search(r"\=(.*?)(\#|\n)", line)[0]
-        scratch_drive = Path(m[1:-1].strip())
-        assert scratch_drive.exists()
-        spikesorter_dir = f"{self.version}_{'_'.join(list(self.session_path.parts[-3:]))}_{self.pname}"
-        temp_dir = scratch_drive.joinpath(spikesorter_dir)
-        _logger.info(f"job progress command: tail -f {temp_dir} *.log")
-        temp_dir.mkdir(parents=True, exist_ok=True)
+        _logger.info(f"job progress command: tail -f {self.scratch_folder_run} *.log")
+        self.scratch_folder_run.mkdir(parents=True, exist_ok=True)
         check_nvidia_driver()
         try:
             # if pykilosort is in the environment, use the installed version within the task
             import iblsorter.ibl  # noqa
-            iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=temp_dir)
+            iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=self.scratch_folder_run, delete=False)
         except ImportError:
-            command2run = f"{self.SHELL_SCRIPT} {ap_file} {temp_dir}"
+            command2run = f"{self.SHELL_SCRIPT} {ap_file} {self.scratch_folder_run}"
             _logger.info(command2run)
             process = subprocess.Popen(
                 command2run,
@@ -675,16 +745,13 @@ def _run_iblsort(self, ap_file):
             if process.returncode != 0:
                 error_str = error.decode("utf-8").strip()
                 # try and get the kilosort log if any
-                for log_file in temp_dir.rglob('*_kilosort.log'):
+                for log_file in self.scratch_folder_run.rglob('*_kilosort.log'):
                     with open(log_file) as fid:
                         log = fid.read()
                         _logger.error(log)
                     break
                 raise RuntimeError(f"{self.SPIKE_SORTER_NAME} {info_str}, {error_str}")
-
-        shutil.copytree(temp_dir.joinpath('output'), sorter_dir, dirs_exist_ok=True)
-        shutil.rmtree(temp_dir, ignore_errors=True)
-
+        shutil.copytree(self.scratch_folder_run.joinpath('output'), sorter_dir, dirs_exist_ok=True)
         return sorter_dir
 
     def _run(self):
@@ -698,34 +765,41 @@ def _run(self):
         """
         efiles = spikeglx.glob_ephys_files(self.session_path.joinpath(self.device_collection, self.pname))
         ap_files = [(ef.get("ap"), ef.get("label")) for ef in efiles if "ap" in ef.keys()]
+        assert len(ap_files) != 0, f"No ap file found for probe {self.session_path.joinpath(self.device_collection, self.pname)}"
         assert len(ap_files) == 1, f"Several bin files found for the same probe {ap_files}"
         ap_file, label = ap_files[0]
         out_files = []
-        ks2_dir = self._run_iblsort(ap_file)  # runs the sorter, skips if it already ran
+        sorter_dir = self._run_iblsort(ap_file)  # runs the sorter, skips if it already ran
+        # convert the data to ALF in the ./alf/probeXX/SPIKE_SORTER_NAME folder
         probe_out_path = self.session_path.joinpath("alf", label, self.SPIKE_SORTER_NAME)
         shutil.rmtree(probe_out_path, ignore_errors=True)
         probe_out_path.mkdir(parents=True, exist_ok=True)
         ibllib.ephys.spikes.ks2_to_alf(
-            ks2_dir,
+            sorter_dir,
             bin_path=ap_file.parent,
             out_path=probe_out_path,
             bin_file=ap_file,
             ampfactor=self._sample2v(ap_file),
         )
-        logfile = ks2_dir.joinpath(f"spike_sorting_{self.SPIKE_SORTER_NAME}.log")
+        logfile = sorter_dir.joinpath(f"spike_sorting_{self.SPIKE_SORTER_NAME}.log")
         if logfile.exists():
             shutil.copyfile(logfile, probe_out_path.joinpath(f"_ibl_log.info_{self.SPIKE_SORTER_NAME}.log"))
+        # recover the QC files from the spike sorting output and copy them
+        for file_qc in sorter_dir.glob('_iblqc_*.npy'):
+            shutil.move(file_qc, file_qc_out := ap_file.parent.joinpath(file_qc.name))
+            out_files.append(file_qc_out)
         # Sync spike sorting with the main behaviour clock: the nidq for 3B+ and the main probe for 3A
         out, _ = ibllib.ephys.spikes.sync_spike_sorting(ap_file=ap_file, out_path=probe_out_path)
         out_files.extend(out)
         # Now compute the unit metrics
-        self.compute_cell_qc(probe_out_path)
+        files_qc, df_units, drift = self.compute_cell_qc(probe_out_path)
+        out_files.extend(files_qc)
         # convert ks2_output into tar file and also register
         # Make this in case spike sorting is in old raw_ephys_data folders, for new
         # sessions it should already exist
         tar_dir = self.session_path.joinpath('spike_sorters', self.SPIKE_SORTER_NAME, label)
         tar_dir.mkdir(parents=True, exist_ok=True)
-        out = ibllib.ephys.spikes.ks2_to_tar(ks2_dir, tar_dir, force=self.FORCE_RERUN)
+        out = ibllib.ephys.spikes.ks2_to_tar(sorter_dir, tar_dir, force=self.FORCE_RERUN)
         out_files.extend(out)
         # run waveform extraction
         _logger.info("Running waveform extraction")
@@ -733,28 +807,29 @@ def _run(self):
         clusters = alfio.load_object(probe_out_path, 'clusters', attribute=['channels'])
         channels = alfio.load_object(probe_out_path, 'channels')
         extract_wfs_cbin(
-            cbin_file=ap_file,
+            bin_file=ap_file,
             output_dir=probe_out_path,
             spike_samples=spikes['samples'],
             spike_clusters=spikes['clusters'],
             spike_channels=clusters['channels'][spikes['clusters']],
-            h=None,  # todo the geometry needs to be set using the spikeglx object
             channel_labels=channels['labels'],
             max_wf=256,
             trough_offset=42,
             spike_length_samples=128,
-            chunksize_samples=int(3000),
+            chunksize_samples=int(30_000),
             n_jobs=None,
             wfs_dtype=np.float16,
-            preprocessing_steps=["phase_shift",
-                                 "bad_channel_interpolation",
-                                 "butterworth",
-                                 "car"]
+            preprocess_steps=["phase_shift", "bad_channel_interpolation", "butterworth", "car"],
+            scratch_dir=self.scratch_folder_run,
         )
+        _logger.info(f"Cleaning up temporary folder {self.scratch_folder_run}")
+        shutil.rmtree(self.scratch_folder_run, ignore_errors=True)
         if self.one:
             eid = self.one.path2eid(self.session_path, query_type='remote')
             ins = self.one.alyx.rest('insertions', 'list', session=eid, name=label, query_type='remote')
             if len(ins) != 0:
+                _logger.info("Populating probe insertion with qc")
+                self._label_probe_qc(probe_out_path, df_units, drift)
                 _logger.info("Creating SpikeSorting QC plots")
                 plot_task = ApPlots(ins[0]['id'], session_path=self.session_path, one=self.one)
                 _ = plot_task.run()
@@ -772,39 +847,3 @@ def _run(self):
                     out_files.extend(out)
 
         return out_files
-
-
-class EphysCellsQc(base_tasks.EphysTask, CellQCMixin):
-    priority = 90
-    job_size = 'small'
-
-    @property
-    def signature(self):
-        signature = {
-            'input_files': [('spikes.times.npy', f'alf/{self.pname}*', True),
-                            ('spikes.clusters.npy', f'alf/{self.pname}*', True),
-                            ('spikes.amps.npy', f'alf/{self.pname}*', True),
-                            ('spikes.depths.npy', f'alf/{self.pname}*', True),
-                            ('clusters.channels.npy', f'alf/{self.pname}*', True)],
-            'output_files': [('clusters.metrics.pqt', f'alf/{self.pname}*', True)]
-        }
-        return signature
-
-    def _run(self):
-        """
-        Post spike-sorting quality control at the cluster level.
-        Outputs a QC table in the clusters ALF object and labels corresponding probes in Alyx
-        """
-        files_spikes = Path(self.session_path).joinpath('alf', self.pname).rglob('spikes.times.npy')
-        folder_probes = [f.parent for f in files_spikes]
-        out_files = []
-        for folder_probe in folder_probes:
-            try:
-                qc_file, df_units, drift = self.compute_cell_qc(folder_probe)
-                out_files.append(qc_file)
-                self._label_probe_qc(folder_probe, df_units, drift)
-            except Exception:
-                _logger.error(traceback.format_exc())
-                self.status = -1
-                continue
-        return out_files
diff --git a/ibllib/pipes/mesoscope_tasks.py b/ibllib/pipes/mesoscope_tasks.py
index 0f2bb0912..2547bdc96 100644
--- a/ibllib/pipes/mesoscope_tasks.py
+++ b/ibllib/pipes/mesoscope_tasks.py
@@ -34,7 +34,7 @@
 from one.alf.spec import to_alf
 from one.alf.files import filename_parts, session_path_parts
 import one.alf.exceptions as alferr
-from iblutil.util import flatten
+from iblutil.util import flatten, ensure_list
 from iblatlas.atlas import ALLEN_CCF_LANDMARKS_MLAPDV_UM, MRITorontoAtlas
 
 from ibllib.pipes import base_tasks
@@ -486,24 +486,42 @@ def _consolidate_exptQC(exptQC):
         numpy.array
             An array of frame indices where QC code != 0.
         """
-
-        # Merge and make sure same indexes have same names across all files
-        frameQC_names_list = [e['frameQC_names'] for e in exptQC]
-        frameQC_names_list = [{f: 0} if isinstance(f, str) else {f[i]: i for i in range(len(f))}
-                              for f in frameQC_names_list]
-        frameQC_names = {k: v for d in frameQC_names_list for k, v in d.items()}
-        for d in frameQC_names_list:
-            for k, v in d.items():
-                if frameQC_names[k] != v:
-                    raise IOError(f'exptQC.mat files have different values for name "{k}"')
-
-        frameQC_names = pd.DataFrame(sorted([(v, k) for k, v in frameQC_names.items()]),
-                                     columns=['qc_values', 'qc_labels'])
-
+        # Create a new enumeration combining all unique QC labels.
+        # 'ok' will always have an enum of 0, the rest are determined by order alone
+        qc_labels = ['ok']
+        frame_qc = []
+        for e in exptQC:
+            assert e.keys() >= set(['frameQC_names', 'frameQC_frames'])
+            # Initialize an NaN array the same size of frameQC_frames to fill with new enum values
+            frames = np.full(e['frameQC_frames'].shape, fill_value=np.nan)
+            # May be numpy array of str or a single str, in both cases we cast to list of str
+            names = list(ensure_list(e['frameQC_names']))
+            # For each label for the old enum, populate initialized array with the new one
+            for name in names:
+                i_old = names.index(name)  # old enumeration
+                name = name if len(name) else 'unknown'  # handle empty array and empty str
+                try:
+                    i_new = qc_labels.index(name)
+                except ValueError:
+                    i_new = len(qc_labels)
+                    qc_labels.append(name)
+                frames[e['frameQC_frames'] == i_old] = i_new
+            frame_qc.append(frames)
         # Concatenate frames
-        frameQC = np.concatenate([e['frameQC_frames'] for e in exptQC], axis=0)
-        bad_frames = np.where(frameQC != 0)[0]
-        return frameQC, frameQC_names, bad_frames
+        frame_qc = np.concatenate(frame_qc)
+        # If any NaNs left over, assign 'unknown' label
+        if (missing_name := np.isnan(frame_qc)).any():
+            try:
+                i = qc_labels.index('unknown')
+            except ValueError:
+                i = len(qc_labels)
+                qc_labels.append('unknown')
+            frame_qc[missing_name] = i
+        frame_qc = frame_qc.astype(np.uint32)  # case to uint
+        bad_frames, = np.where(frame_qc != 0)
+        # Convert labels to value -> label data frame
+        frame_qc_names = pd.DataFrame(list(enumerate(qc_labels)), columns=['qc_values', 'qc_labels'])
+        return frame_qc, frame_qc_names, bad_frames
 
     def get_default_tau(self):
         """
diff --git a/ibllib/pipes/tasks.py b/ibllib/pipes/tasks.py
index 81992600a..14ead0fdd 100644
--- a/ibllib/pipes/tasks.py
+++ b/ibllib/pipes/tasks.py
@@ -85,10 +85,9 @@
 from ibllib.oneibl import data_handlers
 from ibllib.oneibl.data_handlers import get_local_data_repository
 from ibllib.oneibl.registration import get_lab
-from iblutil.util import Bunch, flatten
+from iblutil.util import Bunch, flatten, ensure_list
 import one.params
 from one.api import ONE
-from one.util import ensure_list
 from one import webclient
 import one.alf.io as alfio
 
@@ -115,7 +114,7 @@ class Task(abc.ABC):
     env = None  # the environment name within which to run the task (NB: the env is not activated automatically!)
 
     def __init__(self, session_path, parents=None, taskid=None, one=None,
-                 machine=None, clobber=True, location='server', **kwargs):
+                 machine=None, clobber=True, location='server', scratch_folder=None, **kwargs):
         """
         Base task class
         :param session_path: session path
@@ -126,7 +125,8 @@ def __init__(self, session_path, parents=None, taskid=None, one=None,
         :param clobber: whether or not to overwrite log on rerun
         :param location: location where task is run. Options are 'server' (lab local servers'), 'remote' (remote compute node,
         data required for task downloaded via one), 'AWS' (remote compute node, data required for task downloaded via AWS),
-        or 'SDSC' (SDSC flatiron compute node) # TODO 'Globus' (remote compute node, data required for task downloaded via Globus)
+        or 'SDSC' (SDSC flatiron compute node)
+        :param scratch_folder: optional: Path where to write intermediate temporary data
         :param args: running arguments
         """
         self.taskid = taskid
@@ -142,6 +142,7 @@ def __init__(self, session_path, parents=None, taskid=None, one=None,
         self.clobber = clobber
         self.location = location
         self.plot_tasks = []  # Plotting task/ tasks to create plot outputs during the task
+        self.scratch_folder = scratch_folder
         self.kwargs = kwargs
 
     @property
@@ -222,7 +223,7 @@ def run(self, **kwargs):
                 if self.gpu >= 1:
                     if not self._creates_lock():
                         self.status = -2
-                        _logger.info(f'Job {self.__class__} exited as a lock was found')
+                        _logger.info(f'Job {self.__class__} exited as a lock was found at {self._lock_file_path()}')
                         new_log = log_capture_string.getvalue()
                         self.log = new_log if self.clobber else self.log + new_log
                         _logger.removeHandler(ch)
@@ -435,7 +436,7 @@ def assert_expected_outputs(self, raise_error=True):
 
         return everything_is_fine, files
 
-    def assert_expected_inputs(self, raise_error=True):
+    def assert_expected_inputs(self, raise_error=True, raise_ambiguous=False):
         """
         Check that all the files necessary to run the task have been are present on disk.
 
@@ -470,7 +471,7 @@ def assert_expected_inputs(self, raise_error=True):
                 for k, v in variant_datasets.items() if any(v)}
             _logger.error('Ambiguous input datasets found: %s', ambiguous)
 
-            if raise_error or self.location == 'sdsc':  # take no chances on SDSC
+            if raise_ambiguous or self.location == 'sdsc':  # take no chances on SDSC
                 # This could be mitigated if loading with data OneSDSC
                 raise NotImplementedError(
                     'Multiple variant datasets found. Loading for these is undefined.')
@@ -529,6 +530,8 @@ def get_data_handler(self, location=None):
             dhandler = data_handlers.SDSCDataHandler(self, self.session_path, self.signature, one=self.one)
         elif location == 'popeye':
             dhandler = data_handlers.PopeyeDataHandler(self, self.session_path, self.signature, one=self.one)
+        elif location == 'ec2':
+            dhandler = data_handlers.RemoteEC2DataHandler(self.session_path, self.signature, one=self.one)
         else:
             raise ValueError(f'Unknown location "{location}"')
         return dhandler
diff --git a/ibllib/plots/figures.py b/ibllib/plots/figures.py
index 384042add..14a6bb554 100644
--- a/ibllib/plots/figures.py
+++ b/ibllib/plots/figures.py
@@ -18,6 +18,7 @@
 import one.alf.io as alfio
 from one.alf.exceptions import ALFObjectNotFound
 from ibllib.io.video import get_video_frame, url_from_eid
+from ibllib.oneibl.data_handlers import ExpectedDataset
 import spikeglx
 import neuropixel
 from brainbox.plot import driftmap
@@ -387,7 +388,6 @@ def get_probe_signature(self):
     def get_signatures(self, **kwargs):
         files_spikes = Path(self.session_path).joinpath('alf').rglob('spikes.times.npy')
         folder_probes = [f.parent for f in files_spikes]
-
         full_input_files = []
         for sig in self.signature['input_files']:
             for folder in folder_probes:
@@ -396,8 +396,9 @@ def get_signatures(self, **kwargs):
             self.input_files = full_input_files
         else:
             self.input_files = self.signature['input_files']
-
         self.output_files = self.signature['output_files']
+        self.input_files = [ExpectedDataset.input(*i) for i in self.input_files]
+        self.output_files = [ExpectedDataset.output(*i) for i in self.output_files]
 
 
 class BadChannelsAp(ReportSnapshotProbe):
diff --git a/ibllib/qc/base.py b/ibllib/qc/base.py
index 90899ef72..4385aa463 100644
--- a/ibllib/qc/base.py
+++ b/ibllib/qc/base.py
@@ -85,7 +85,7 @@ def overall_outcome(outcomes: iter, agg=max) -> spec.QC:
         one.alf.spec.QC
             The overall outcome.
         """
-        outcomes = filter(lambda x: x not in (None, np.NaN), outcomes)
+        outcomes = filter(lambda x: x not in (None, np.nan), outcomes)
         return agg(map(spec.QC.validate, outcomes))
 
     def _set_eid_or_path(self, session_path_or_eid):
diff --git a/ibllib/qc/camera.py b/ibllib/qc/camera.py
index dedd6c9a5..6a8220e95 100644
--- a/ibllib/qc/camera.py
+++ b/ibllib/qc/camera.py
@@ -579,9 +579,9 @@ def check_dropped_frames(self, threshold=.1):
         size_diff = int(self.data['count'].size - self.data['video']['length'])
         strict_increase = np.all(np.diff(self.data['count']) > 0)
         if not strict_increase:
-            n_effected = np.sum(np.invert(strict_increase))
+            n_affected = np.sum(np.invert(strict_increase))
             _log.info(f'frame count not strictly increasing: '
-                      f'{n_effected} frames effected ({n_effected / strict_increase.size:.2%})')
+                      f'{n_affected} frames affected ({n_affected / strict_increase.size:.2%})')
             return spec.QC.CRITICAL
         dropped = np.diff(self.data['count']).astype(int) - 1
         pct_dropped = (sum(dropped) / len(dropped) * 100)
diff --git a/ibllib/qc/task_extractors.py b/ibllib/qc/task_extractors.py
index 2ef931984..4527e8745 100644
--- a/ibllib/qc/task_extractors.py
+++ b/ibllib/qc/task_extractors.py
@@ -15,7 +15,7 @@
                    'response_times', 'rewardVolume', 'stimFreezeTrigger_times',
                    'stimFreeze_times', 'stimOffTrigger_times', 'stimOff_times',
                    'stimOnTrigger_times', 'stimOn_times', 'valveOpen_times',
-                   'wheel_moves_intervals', 'wheel_moves_peak_amplitude',
+                   'wheelMoves_intervals', 'wheelMoves_peakAmplitude', 'wheelMoves_peakVelocity_times',
                    'wheel_position', 'wheel_timestamps']
 
 
@@ -56,14 +56,10 @@ def rename_data(data):
         if 'valveOpen_times' not in data:
             data['valveOpen_times'] = data['feedback_times'].copy()
             data['valveOpen_times'][~correct] = np.nan
-        if 'wheel_moves_intervals' not in data and 'wheelMoves_intervals' in data:
-            data['wheel_moves_intervals'] = data.pop('wheelMoves_intervals')
-        if 'wheel_moves_peak_amplitude' not in data and 'wheelMoves_peakAmplitude' in data:
-            data['wheel_moves_peak_amplitude'] = data.pop('wheelMoves_peakAmplitude')
         data['correct'] = correct
         diff_fields = list(set(REQUIRED_FIELDS).difference(set(data.keys())))
         for miss_field in diff_fields:
-            data[miss_field] = data['feedback_times'] * np.nan
+            data[miss_field] = None if miss_field.startswith('wheel') else data['feedback_times'] * np.nan
         if len(diff_fields):
             _logger.warning(f'QC extractor, missing fields filled with NaNs: {diff_fields}')
         return data
diff --git a/ibllib/qc/task_metrics.py b/ibllib/qc/task_metrics.py
index f5ff83437..86f0e4a9b 100644
--- a/ibllib/qc/task_metrics.py
+++ b/ibllib/qc/task_metrics.py
@@ -512,7 +512,7 @@ def compute(self, **kwargs):
         iti = (np.roll(data['stimOn_times'], -1) - data['stimOff_times'])[:-1]
         metric = np.r_[np.nan_to_num(iti, nan=np.inf), np.nan] - 1.
         passed[check] = np.abs(metric) <= 0.1
-        passed[check][-1] = np.NaN
+        passed[check][-1] = np.nan
         metrics[check] = metric
 
         # Checks common to training QC
@@ -1111,9 +1111,8 @@ def check_n_trial_events(data, **_):
     # test errorCueTrigger_times separately
     # stimFreeze_times fails often due to TTL flicker
     exclude = ['camera_timestamps', 'errorCueTrigger_times', 'errorCue_times',
-               'firstMovement_times', 'peakVelocity_times', 'valveOpen_times',
-               'wheel_moves_peak_amplitude', 'wheel_moves_intervals', 'wheel_timestamps',
-               'wheel_intervals', 'stimFreeze_times']
+               'wheelMoves_peakVelocity_times', 'valveOpen_times', 'wheelMoves_peakAmplitude',
+               'wheelMoves_intervals', 'wheel_timestamps', 'stimFreeze_times']
     events = [k for k in data.keys() if k.endswith('_times') and k not in exclude]
     metric = np.zeros(data['intervals'].shape[0], dtype=bool)
 
diff --git a/ibllib/tests/extractors/test_extractors.py b/ibllib/tests/extractors/test_extractors.py
index 46bcd826d..0decc6880 100644
--- a/ibllib/tests/extractors/test_extractors.py
+++ b/ibllib/tests/extractors/test_extractors.py
@@ -3,13 +3,14 @@
 import shutil
 import tempfile
 import unittest
-import unittest.mock
+from unittest.mock import patch, Mock, MagicMock
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
 
 import one.alf.io as alfio
+from ibllib.io.extractors.bpod_trials import get_bpod_extractor
 from ibllib.io.extractors import training_trials, biased_trials, camera
 from ibllib.io import raw_data_loaders as raw
 from ibllib.io.extractors.base import BaseExtractor
@@ -530,13 +531,13 @@ def test_size_outputs(self):
             'peakVelocity_times': np.array([1, 1])}
         function_name = 'ibllib.io.extractors.training_wheel.extract_wheel_moves'
         # Training
-        with unittest.mock.patch(function_name, return_value=mock_data):
+        with patch(function_name, return_value=mock_data):
             task, = get_trials_tasks(self.training_lt5['path'])
             trials, _ = task.extract_behaviour(save=True)
         trials = alfio.load_object(self.training_lt5['path'] / 'alf', object='trials')
         self.assertTrue(alfio.check_dimensions(trials) == 0)
         # Biased
-        with unittest.mock.patch(function_name, return_value=mock_data):
+        with patch(function_name, return_value=mock_data):
             task, = get_trials_tasks(self.biased_lt5['path'])
             trials, _ = task.extract_behaviour(save=True)
         trials = alfio.load_object(self.biased_lt5['path'] / 'alf', object='trials')
@@ -679,7 +680,7 @@ def test_groom_pin_state(self):
             gpio['indices'][i + 1] = np.where(ts > rise + pulse_width)[0][0]
 
         gpio_, audio_, ts_ = camera.groom_pin_state(gpio, audio, ts)
-        self.assertEqual(audio, audio_, 'Audio dict shouldn\'t be effected')
+        self.assertEqual(audio, audio_, 'Audio dict shouldn\'t be affected')
         np.testing.assert_array_almost_equal(ts_[:4], [40., 40.016667, 40.033333, 40.05])
 
         # Broken TTLs + extra TTL
@@ -753,5 +754,33 @@ def test_attribute_times(self, display=False):
             camera.attribute_times(tsa, tsb, injective=False, take='closest')
 
 
+class TestGetBpodExtractor(unittest.TestCase):
+
+    def test_get_bpod_extractor(self):
+        # un-existing extractor should raise a value error
+        with self.assertRaises(ValueError):
+            get_bpod_extractor('', protocol='sdf', task_collection='raw_behavior_data')
+        # in this case this returns an ibllib.io.extractors.training_trials.TrainingTrials instance
+        extractor = get_bpod_extractor(
+            '', protocol='_trainingChoiceWorld',
+            task_collection='raw_behavior_data'
+        )
+        self.assertTrue(isinstance(extractor, BaseExtractor))
+
+    def test_get_bpod_custom_extractor(self):
+        # here we'll mock a custom module with a custom extractor
+        DummyModule = MagicMock()
+        DummyExtractor = Mock(spec_set=BaseExtractor)
+        DummyModule.toto.return_value = DummyExtractor
+        base_module = 'ibllib.io.extractors.bpod_trials'
+        with patch(f'{base_module}.get_bpod_extractor_class', return_value='toto'), \
+                patch(f'{base_module}.importlib.import_module', return_value=DummyModule) as import_mock:
+            self.assertIs(get_bpod_extractor(''), DummyExtractor)
+            import_mock.assert_called_with('projects')
+            # Check raises when imported class not an extractor
+            DummyModule.toto.return_value = MagicMock(spec=dict)
+            self.assertRaisesRegex(ValueError, 'should be an Extractor class', get_bpod_extractor, '')
+
+
 if __name__ == '__main__':
     unittest.main(exit=False, verbosity=2)
diff --git a/ibllib/tests/test_io.py b/ibllib/tests/test_io.py
index 3ef9574e4..661c4d2a4 100644
--- a/ibllib/tests/test_io.py
+++ b/ibllib/tests/test_io.py
@@ -527,10 +527,22 @@ def test_read_yaml(self):
         self.assertCountEqual(self.fixture.keys(), data_keys)
 
     def test_patch_data(self):
+        """Test for session_params._patch_file function."""
         with patch(session_params.__name__ + '.SPEC_VERSION', '1.0.0'), \
                 self.assertLogs(session_params.__name__, logging.WARNING):
             data = session_params._patch_file({'version': '1.1.0'})
         self.assertEqual(data, {'version': '1.0.0'})
+        # Check tasks dicts separated into lists
+        unpatched = {'version': '0.0.1', 'tasks': {
+            'fooChoiceWorld': {1: '1'}, 'barChoiceWorld': {2: '2'}}}
+        data = session_params._patch_file(unpatched)
+        self.assertIsInstance(data['tasks'], list)
+        self.assertEqual([['fooChoiceWorld'], ['barChoiceWorld']], list(map(list, data['tasks'])))
+        # Check patching list of dicts with some containing more than 1 key
+        unpatched = {'tasks': [{'foo': {1: '1'}}, {'bar': {2: '2'}, 'baz': {3: '3'}}]}
+        data = session_params._patch_file(unpatched)
+        self.assertEqual(3, len(data['tasks']))
+        self.assertEqual([['foo'], ['bar'], ['baz']], list(map(list, data['tasks'])))
 
     def test_get_collections(self):
         collections = session_params.get_collections(self.fixture)
@@ -561,10 +573,16 @@ def test_merge_params(self):
         b = {'procedures': ['Imaging', 'Injection'], 'tasks': [{'fooChoiceWorld': {'collection': 'bar'}}]}
         c = session_params.merge_params(a, b, copy=True)
         self.assertCountEqual(['Imaging', 'Behavior training/tasks', 'Injection'], c['procedures'])
-        self.assertCountEqual(['passiveChoiceWorld', 'ephysChoiceWorld', 'fooChoiceWorld'], (list(x)[0] for x in c['tasks']))
+        self.assertEqual(['passiveChoiceWorld', 'ephysChoiceWorld', 'fooChoiceWorld'], [list(x)[0] for x in c['tasks']])
         # Ensure a and b not modified
         self.assertNotEqual(set(c['procedures']), set(a['procedures']))
         self.assertNotEqual(set(a['procedures']), set(b['procedures']))
+        # Test duplicate tasks skipped while order kept constant
+        d = {'tasks': [a['tasks'][1], {'ephysChoiceWorld': {'collection': 'raw_task_data_02', 'sync_label': 'nidq'}}]}
+        e = session_params.merge_params(c, d, copy=True)
+        expected = ['passiveChoiceWorld', 'ephysChoiceWorld', 'fooChoiceWorld', 'ephysChoiceWorld']
+        self.assertEqual(expected, [list(x)[0] for x in e['tasks']])
+        self.assertDictEqual({'collection': 'raw_task_data_02', 'sync_label': 'nidq'}, e['tasks'][-1]['ephysChoiceWorld'])
         # Test without copy
         session_params.merge_params(a, b, copy=False)
         self.assertCountEqual(['Imaging', 'Behavior training/tasks', 'Injection'], a['procedures'])
diff --git a/ibllib/tests/test_mesoscope.py b/ibllib/tests/test_mesoscope.py
index 97a2d85d2..45bd1cf14 100644
--- a/ibllib/tests/test_mesoscope.py
+++ b/ibllib/tests/test_mesoscope.py
@@ -97,6 +97,32 @@ def test_get_default_tau(self):
             subject_detail['genotype'].pop(1)
             self.assertEqual(self.task.get_default_tau(), 1.5)  # return the default value
 
+    def test_consolidate_exptQC(self):
+        """Test for MesoscopePreprocess._consolidate_exptQC method."""
+        exptQC = [
+            {'frameQC_names': np.array(['ok', 'PMT off', 'galvos fault', 'high signal'], dtype=object),
+             'frameQC_frames': np.array([0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4])},
+            {'frameQC_names': np.array(['ok', 'PMT off', 'foo', 'galvos fault', np.array([])], dtype=object),
+             'frameQC_frames': np.array([0, 0, 1, 1, 2, 2, 2, 2, 3, 4])},
+            {'frameQC_names': 'ok',  # check with single str instead of array
+             'frameQC_frames': np.array([0, 0])}
+        ]
+
+        # Check concatinates frame QC arrays
+        frame_qc, frame_qc_names, bad_frames = self.task._consolidate_exptQC(exptQC)
+        # Check frame_qc array
+        expected_frames = [
+            0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 5, 0, 0, 1, 1, 4, 4, 4, 4, 2, 5, 0, 0]
+        np.testing.assert_array_equal(expected_frames, frame_qc)
+        # Check bad_frames array
+        expected = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25]
+        np.testing.assert_array_equal(expected, bad_frames)
+        # Check frame_qc_names data frame
+        self.assertCountEqual(['qc_values', 'qc_labels'], frame_qc_names.columns)
+        self.assertEqual(list(range(6)), frame_qc_names['qc_values'].tolist())
+        expected = ['ok', 'PMT off', 'galvos fault', 'high signal', 'foo', 'unknown']
+        self.assertCountEqual(expected, frame_qc_names['qc_labels'].tolist())
+
     def test_setup_uncompressed(self):
         """Test set up behaviour when raw tifs present."""
         # Test signature when clobber = True
diff --git a/ibllib/tests/test_pipes.py b/ibllib/tests/test_pipes.py
index 56ef51e68..9383d6dad 100644
--- a/ibllib/tests/test_pipes.py
+++ b/ibllib/tests/test_pipes.py
@@ -38,8 +38,8 @@ def test_task_queue(self, lab_repo_mock):
         lab_repo_mock.return_value = 'foo_repo'
         tasks = [
             {'executable': 'ibllib.pipes.mesoscope_tasks.MesoscopePreprocess', 'priority': 80},
-            {'executable': 'ibllib.pipes.ephys_tasks.SpikeSorting', 'priority': SpikeSorting.priority},
-            {'executable': 'ibllib.pipes.base_tasks.RegisterRawDataTask', 'priority': RegisterRawDataTask.priority}
+            {'executable': 'ibllib.pipes.ephys_tasks.SpikeSorting', 'priority': SpikeSorting.priority},  # 60
+            {'executable': 'ibllib.pipes.base_tasks.RegisterRawDataTask', 'priority': RegisterRawDataTask.priority}  # 100
         ]
         alyx = mock.Mock(spec=AlyxClient)
         alyx.rest.return_value = tasks
@@ -49,10 +49,10 @@ def test_task_queue(self, lab_repo_mock):
         self.assertIn('foolab', alyx.rest.call_args.kwargs.get('django', ''))
         self.assertIn('foo_repo', alyx.rest.call_args.kwargs.get('django', ''))
         # Expect to return tasks in descending priority order, without mesoscope task (different env)
-        self.assertEqual([tasks[2], tasks[1]], queue)
+        self.assertEqual([tasks[2]], queue)
         # Expect only mesoscope task returned when relevant env passed
-        queue = local_server.task_queue(lab='foolab', alyx=alyx, env=('suite2p',))
-        self.assertEqual([tasks[0]], queue)
+        queue = local_server.task_queue(lab='foolab', alyx=alyx, env=('suite2p', 'iblsorter'))
+        self.assertEqual([tasks[0], tasks[1]], queue)
         # Expect no tasks as mesoscope task is a large job
         queue = local_server.task_queue(mode='small', lab='foolab', alyx=alyx, env=('suite2p',))
         self.assertEqual([], queue)
diff --git a/ibllib/tests/test_tasks.py b/ibllib/tests/test_tasks.py
index a76558944..31416b053 100644
--- a/ibllib/tests/test_tasks.py
+++ b/ibllib/tests/test_tasks.py
@@ -153,15 +153,9 @@ class TestPipelineAlyx(unittest.TestCase):
 
     def setUp(self) -> None:
         self.td = tempfile.TemporaryDirectory()
-        # ses = one.alyx.rest('sessions', 'list', subject=ses_dict['subject'],
-        #                     date_range=[ses_dict['start_time'][:10]] * 2,
-        #                     number=ses_dict['number'],
-        #                     no_cache=True)
-        # if len(ses):
-        #     one.alyx.rest('sessions', 'delete', ses[0]['url'][-36:])
-        # randomise number
-        ses_dict['number'] = np.random.randint(1, 30)
-        ses = one.alyx.rest('sessions', 'create', data=ses_dict)
+        self.ses_dict = ses_dict.copy()
+        self.ses_dict['number'] = np.random.randint(1, 999)
+        ses = one.alyx.rest('sessions', 'create', data=self.ses_dict)
         session_path = Path(self.td.name).joinpath(
             ses['subject'], ses['start_time'][:10], str(ses['number']).zfill(3))
         session_path.joinpath('alf').mkdir(exist_ok=True, parents=True)
diff --git a/release_notes.md b/release_notes.md
index 18e010af7..97aa02a8b 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,9 @@
+## Release Note 2.40.0
+
+### features
+- iblsorter >= 1.9 sorting tasks with waveform extraction and channel sorting 
+- s3 patcher prototype
+
 ## Release Note 2.39.0
 
 ### features
@@ -12,6 +18,10 @@
 
 #### 2.39.1
 - Bugfix: brainbox.metrics.single_unit.quick_unit_metrics fix for indexing of n_spike_below2
+- 
+#### 2.39.2
+- Bugfix: routing of protocol to extractor through the project repository checks that the
+target is indeed an extractor class.
 
 ## Release Note 2.38.0
 
diff --git a/requirements.txt b/requirements.txt
index 7524c22f3..e41bcda1f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,10 +23,10 @@ seaborn>=0.9.0
 tqdm>=4.32.1
 # ibl libraries
 iblatlas>=0.5.3
-ibl-neuropixel>=1.0.1
-iblutil>=1.11.0
+ibl-neuropixel>=1.5.0
+iblutil>=1.13.0
 mtscomp>=1.0.1
-ONE-api~=2.9.rc0
+ONE-api>=2.10
 phylib>=2.6.0
 psychofit
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics