From 865c644b336e6c77d32c7e85025f849d69612515 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 14 Nov 2024 13:28:31 -0600 Subject: [PATCH 1/6] feat(analysis): store subject position ethogram to `BlockSubjectPositionPlots.InROI` table --- aeon/dj_pipeline/analysis/block_analysis.py | 56 +++++++++++++-------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/aeon/dj_pipeline/analysis/block_analysis.py b/aeon/dj_pipeline/analysis/block_analysis.py index 67605344..9936f5b0 100644 --- a/aeon/dj_pipeline/analysis/block_analysis.py +++ b/aeon/dj_pipeline/analysis/block_analysis.py @@ -1331,12 +1331,21 @@ class BlockSubjectPositionPlots(dj.Computed): definition = """ -> BlockSubjectAnalysis --- - ethogram_data: longblob # ethogram data in record array format position_plot: longblob # position plot (plotly) position_heatmap_plot: longblob # position heatmap plot (plotly) position_ethogram_plot: longblob # position ethogram plot (plotly) """ + class InROI(dj.Part): + definition = """ # time spent in each ROI for each subject + -> master + subject_name: varchar(32) + roi_name: varchar(32) + --- + in_roi_time: float # total seconds spent in this ROI for this block + in_roi_timestamps: longblob # timestamps when a subject is at a specific ROI + """ + def make(self, key): """Compute and plot various block-level statistics and visualizations.""" # Get some block info @@ -1353,32 +1362,26 @@ def make(self, key): # Figure 1 - Position (centroid) over time # --- # Get animal position data - pose_query = ( - streams.SpinnakerVideoSource - * tracking.SLEAPTracking.PoseIdentity.proj( - "identity_name", "identity_likelihood", part_name="anchor_part" - ) - * tracking.SLEAPTracking.Part - & {"spinnaker_video_source_name": "CameraTop"} - & key - & chunk_restriction - ) - centroid_df = fetch_stream(pose_query)[block_start:block_end] + pos_cols = {"x": "position_x", "y": "position_y", "time": "position_timestamps"} + centroid_df = (BlockAnalysis.Subject.proj(**pos_cols) + & key).fetch(format="frame").reset_index() + centroid_df.drop(columns=["experiment_name", "block_start"], inplace=True) + centroid_df = centroid_df.explode(column=list(pos_cols)) + centroid_df.set_index("time", inplace=True) centroid_df = ( - centroid_df.groupby("identity_name") + centroid_df.groupby("subject_name") .resample("100ms") .first() - .droplevel("identity_name") + .droplevel("subject_name") .dropna() .sort_index() ) - centroid_df.drop(columns=["spinnaker_video_source_name"], inplace=True) centroid_df["x"] = centroid_df["x"].astype(np.int32) centroid_df["y"] = centroid_df["y"].astype(np.int32) # Plot it position_fig = go.Figure() - for id_i, (id_val, id_grp) in enumerate(centroid_df.groupby("identity_name")): + for id_i, (id_val, id_grp) in enumerate(centroid_df.groupby("subject_name")): norm_time = ( (id_grp.index - id_grp.index[0]) / (id_grp.index[-1] - id_grp.index[0]) ).values.round(3) @@ -1407,7 +1410,7 @@ def make(self, key): # Calculate heatmaps max_x, max_y = int(centroid_df["x"].max()), int(centroid_df["y"].max()) heatmaps = [] - for id_val, id_grp in centroid_df.groupby("identity_name"): + for id_val, id_grp in centroid_df.groupby("subject_name"): # Add counts of x,y points to a grid that will be used for heatmap img_grid = np.zeros((max_x + 1, max_y + 1)) points, counts = np.unique(id_grp[["x", "y"]].values, return_counts=True, axis=0) @@ -1483,7 +1486,7 @@ def make(self, key): pos_eth_df = pd.DataFrame( columns=(["Subject"] + rois), index=centroid_df.index ) # df to create eth fig - pos_eth_df["Subject"] = centroid_df["identity_name"] + pos_eth_df["Subject"] = centroid_df["subject_name"] # For each ROI, compute if within ROI for roi in rois: @@ -1558,10 +1561,23 @@ def make(self, key): ): entry[fig_name] = json.loads(fig.to_json()) - melted_df.drop(columns=["Val"], inplace=True) - entry["ethogram_data"] = melted_df.to_records(index=False) + # insert into InROI + in_roi_entries = [] + for subject_name, roi_name in itertools.product(set(melted_df.Subject), rois): + df_ = melted_df[(melted_df["Subject"] == subject_name) & (melted_df["Loc"] == roi_name)] + + roi_timestamps = df_["time"].values + roi_time = len(roi_timestamps) * 0.1 # 100ms per timestamp + + in_roi_entries.append( + {**key, "subject_name": subject_name, + "roi_name": roi_name, + "in_roi_time": roi_time, + "in_roi_timestamps": roi_timestamps} + ) self.insert1(entry) + self.InROI.insert(in_roi_entries) # ---- Foraging Bout Analysis ---- From 41da2bbe54fe5cb6db1812ab243313b9778d3cdb Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 19 Nov 2024 16:54:22 -0600 Subject: [PATCH 2/6] feat(subject): subject experiment mapping from pyrat --- aeon/dj_pipeline/subject.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/aeon/dj_pipeline/subject.py b/aeon/dj_pipeline/subject.py index c7cd1dac..cf869c4f 100644 --- a/aeon/dj_pipeline/subject.py +++ b/aeon/dj_pipeline/subject.py @@ -373,6 +373,8 @@ def make(self, key): } ) + associate_subject_and_experiment(eartag_or_id) + completion_time = datetime.now(timezone.utc) self.insert1( { @@ -500,3 +502,28 @@ def get_pyrat_data(endpoint: str, params: dict = None, **kwargs): ) return response.json() + + +def associate_subject_and_experiment(subject_name): + """ + Check SubjectComment for experiment name for which the animal is participating in. + The expected comment format is "experiment: ". + E.g. "experiment: social0.3-aeon3" + Note: this function many need to run repeatedly to catch all experiments/animals. + - an experiment is not yet added when the animal comment is added + - the animal comment is not yet added when the experiment is created + """ + from aeon.dj_pipeline import acquisition + + new_entries = [] + for entry in (SubjectComment.proj("content") + & {"subject": subject_name} + & "content LIKE 'experiment:%'").fetch(as_dict=True): + entry.pop("comment_id") + entry["experiment_name"] = entry.pop("content").replace("experiment:", "").strip() + if acquisition.Experiment.proj() & entry: + if not acquisition.Experiment.Subject & entry: + new_entries.append(entry) + logger.info(f"\tNew experiment subject: {entry}") + + acquisition.Experiment.Subject.insert(new_entries) From b5be2eddebafbe370fcfc1256d436b4076cd7d69 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 19 Nov 2024 17:16:08 -0600 Subject: [PATCH 3/6] feat(sciviz): add additional block level meta information --- aeon/dj_pipeline/webapps/sciviz/specsheet.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/aeon/dj_pipeline/webapps/sciviz/specsheet.yaml b/aeon/dj_pipeline/webapps/sciviz/specsheet.yaml index 7c8793ec..20052ca9 100644 --- a/aeon/dj_pipeline/webapps/sciviz/specsheet.yaml +++ b/aeon/dj_pipeline/webapps/sciviz/specsheet.yaml @@ -669,8 +669,14 @@ SciViz: return dict(**kwargs) dj_query: > def dj_query(aeon_block_analysis): - aeon_analysis = aeon_block_analysis - query = aeon_analysis.Block * aeon_analysis.BlockAnalysis + block_analysis = aeon_block_analysis + query = block_analysis.Block.proj() * block_analysis.BlockAnalysis + query *= block_analysis.BlockAnalysis.aggr( + block_analysis.BlockAnalysis.Subject, subjects="GROUP_CONCAT(subject_name)", keep_all_rows=True) + query *= block_analysis.BlockAnalysis.aggr( + block_analysis.BlockAnalysis.Patch.proj(patch_rate="CONCAT(patch_name, ':', patch_rate, '(', patch_offset, ')')"), patch_rates="GROUP_CONCAT(patch_rate)", keep_all_rows=True) + query *= block_analysis.BlockAnalysis.aggr( + block_analysis.BlockAnalysis.Patch.proj(patch_pellet="CONCAT(patch_name, ':', pellet_count)"), patch_pellets="GROUP_CONCAT(patch_pellet)", keep_all_rows=True) return dict(query=query, fetch_args=[]) comp2: route: /per_block_patch_stats_plot From 507a0bc1edf04ea65bf93c0cc3829b1b449a4067 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 20 Nov 2024 08:57:15 -0600 Subject: [PATCH 4/6] Create fix_anchor_part_fullpose.py --- .../scripts/fix_anchor_part_fullpose.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 aeon/dj_pipeline/scripts/fix_anchor_part_fullpose.py diff --git a/aeon/dj_pipeline/scripts/fix_anchor_part_fullpose.py b/aeon/dj_pipeline/scripts/fix_anchor_part_fullpose.py new file mode 100644 index 00000000..d7429ba1 --- /dev/null +++ b/aeon/dj_pipeline/scripts/fix_anchor_part_fullpose.py @@ -0,0 +1,64 @@ +""" +Script to fix the anchor part of the fullpose SLEAP entries. +See this commit: https://github.com/SainsburyWellcomeCentre/aeon_mecha/commit/8358ce4b6923918920efb77d09adc769721dbb9b + +Last run: --- +""" +import pandas as pd +from tqdm import tqdm +from aeon.dj_pipeline import acquisition, tracking, streams + +aeon_schemas = acquisition.aeon_schemas +logger = acquisition.logger +io_api = acquisition.io_api + + +def update_anchor_part(key): + chunk_start, chunk_end = (acquisition.Chunk & key).fetch1("chunk_start", "chunk_end") + + data_dirs = acquisition.Experiment.get_data_directories(key) + + device_name = (streams.SpinnakerVideoSource & key).fetch1("spinnaker_video_source_name") + + devices_schema = getattr( + aeon_schemas, + (acquisition.Experiment.DevicesSchema & {"experiment_name": key["experiment_name"]}).fetch1( + "devices_schema_name" + ), + ) + + stream_reader = getattr(getattr(devices_schema, device_name), "Pose") + + # special ingestion case for social0.2 full-pose data (using Pose reader from social03) + # fullpose for social0.2 has a different "pattern" for non-fullpose, hence the Pose03 reader + if key["experiment_name"].startswith("social0.2"): + from aeon.io import reader as io_reader + stream_reader = getattr(getattr(devices_schema, device_name), "Pose03") + assert isinstance(stream_reader, io_reader.Pose), "Pose03 is not a Pose reader" + data_dirs = [acquisition.Experiment.get_data_directory(key, "processed")] + + pose_data = io_api.load( + root=data_dirs, + reader=stream_reader, + start=pd.Timestamp(chunk_start), + end=pd.Timestamp(chunk_end), + ) + + if not len(pose_data): + raise ValueError(f"No SLEAP data found for {key['experiment_name']} - {device_name}") + + # get anchor part + anchor_part = next(v.replace("_x", "") for v in stream_reader.columns if v.endswith("_x")) + + # update anchor part + for entry in tracking.SLEAPTracking.PoseIdentity.fetch("KEY"): + entry["anchor_part"] = anchor_part + tracking.SLEAPTracking.PoseIdentity.update1(entry) + + logger.info(f"Anchor part updated to {anchor_part} for {key}") + + +def main(): + keys = tracking.SLEAPTracking.fetch("KEY") + for key in tqdm(keys): + update_anchor_part(key) From e73c0992f6f77604245c0b55ca5bb7755f272239 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 20 Nov 2024 09:00:21 -0600 Subject: [PATCH 5/6] fix(subject): bugfix merge conflicts --- aeon/dj_pipeline/subject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/dj_pipeline/subject.py b/aeon/dj_pipeline/subject.py index d3698b1a..52a524a4 100644 --- a/aeon/dj_pipeline/subject.py +++ b/aeon/dj_pipeline/subject.py @@ -375,7 +375,7 @@ def make(self, key): associate_subject_and_experiment(eartag_or_id) - completion_time = datetime.now(timezone.utc) + completion_time = datetime.now(UTC) self.insert1( { **key, From fa53e236defa774480d7f4730067740d9f6a8443 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Nov 2024 12:19:14 -0600 Subject: [PATCH 6/6] feat(block_analysis): improve BlockDetection logic to better track newly identified blocks --- aeon/dj_pipeline/analysis/block_analysis.py | 29 ++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/aeon/dj_pipeline/analysis/block_analysis.py b/aeon/dj_pipeline/analysis/block_analysis.py index 2880b050..db3934a8 100644 --- a/aeon/dj_pipeline/analysis/block_analysis.py +++ b/aeon/dj_pipeline/analysis/block_analysis.py @@ -42,10 +42,18 @@ class Block(dj.Manual): @schema class BlockDetection(dj.Computed): - definition = """ + definition = """ # Detecting new block(s) for each new Chunk -> acquisition.Environment + --- + execution_time=null: datetime """ + class IdentifiedBlock(dj.Part): + definition = """ # the block(s) identified in this BlockDetection + -> master + -> Block + """ + key_source = acquisition.Environment - {"experiment_name": "social0.1-aeon3"} def make(self, key): @@ -70,12 +78,9 @@ def make(self, key): block_state_query = acquisition.Environment.BlockState & exp_key & chunk_restriction block_state_df = fetch_stream(block_state_query) if block_state_df.empty: - self.insert1(key) + # self.insert1(key) return - block_state_df.index = block_state_df.index.round( - "us" - ) # timestamp precision in DJ is only at microseconds block_state_df = block_state_df.loc[ (block_state_df.index > chunk_start) & (block_state_df.index <= chunk_end) ] @@ -103,7 +108,10 @@ def make(self, key): ) Block.insert(block_entries, skip_duplicates=True) - self.insert1(key) + # self.insert1({**key, "execution_time": datetime.now(UTC)}) + self.IdentifiedBlock.insert( + {**key, "block_start": entry["block_start"]} for entry in block_entries + ) # ---- Block Analysis and Visualization ---- @@ -316,6 +324,15 @@ def make(self, key): if _df.type.iloc[-1] != "Exit": subject_names.append(subject_name) + # Check for ExperimentTimeline to validate subjects in this block + timeline_query = (acquisition.ExperimentTimeline + & acquisition.ExperimentTimeline.Subject + & key + & f"start <= '{block_start}' AND end >= '{block_end}'") + timeline_subjects = (acquisition.ExperimentTimeline.Subject & timeline_query).fetch("subject") + if len(timeline_subjects): + subject_names = [s for s in subject_names if s in timeline_subjects] + if use_blob_position and len(subject_names) > 1: raise ValueError( f"Without SLEAPTracking, BlobPosition can only handle a single-subject block. "