Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Precomputed marker genes replacement for mv #26

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bin/load_db_scxa_marker_genes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ set -e
scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
source $scriptDir/db_scxa_common.sh

postgres_scripts_dir=$scriptDir/../postgres_routines

dbConnection=${dbConnection:-$1}
EXP_ID=${EXP_ID:-$2}
EXPERIMENT_MGENES_PATH=${EXPERIMENT_MGENES_PATH:-$3}
Expand Down Expand Up @@ -85,5 +87,9 @@ if [[ -z ${NUMBER_MGENES_FILES+x} || $NUMBER_MGENES_FILES -gt 0 ]]; then

rm $EXPERIMENT_MGENES_PATH/mgenesDataToLoad.csv

echo "Precompute tables for marker genes queries..."
cat $postgres_scripts_dir/07-loading-marker-genes-precomputed.sql.template \
| sed "s/<<ACCESSION>>/$EXP_ID/" | psql $dbConnection

echo "Marker genes: Loading done for $EXP_ID..."
fi
4 changes: 0 additions & 4 deletions bin/refresh_materialised_views.sh

This file was deleted.

2 changes: 2 additions & 0 deletions bin/reindex_tables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ REINDEX TABLE scxa_marker_genes;
REINDEX TABLE scxa_cell_clusters;
REINDEX TABLE scxa_analytics;
REINDEX TABLE experiment;
-- REINDEX TABLE scxa_top_5_marker_genes_per_cluster;
-- REINDEX TABLE scxa_marker_gene_stats;

CLUSTER;
EOF
49 changes: 49 additions & 0 deletions postgres_routines/07-loading-marker-genes-precomputed.sql.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
DO $$

DECLARE
exp_accession VARCHAR(255) := '<<ACCESSION>>';

BEGIN

DELETE FROM scxa_top_5_marker_genes_per_cluster WHERE experiment_accession = exp_accession;
INSERT INTO scxa_top_5_marker_genes_per_cluster(r, experiment_accession, gene_id, k, cluster_id, marker_probability)
SELECT x.r,
x.experiment_accession,
x.gene_id,
x.k,
x.cluster_id,
x.marker_probability
FROM (
SELECT
row_number() over (PARTITION BY scxa_marker_genes.k, scxa_marker_genes.cluster_id ORDER BY scxa_marker_genes.marker_probability) as r,
scxa_marker_genes.experiment_accession,
scxa_marker_genes.gene_id,
scxa_marker_genes.k,
scxa_marker_genes.cluster_id,
scxa_marker_genes.marker_probability
FROM scxa_marker_genes
WHERE scxa_marker_genes.experiment_accession = exp_accession
) x
WHERE x.r <= 5;

DELETE FROM scxa_marker_gene_stats WHERE experiment_accession = exp_accession;
INSERT INTO scxa_marker_gene_stats(experiment_accession, gene_id, k_where_marker, cluster_id_where_marker, cluster_id, marker_p_value, mean_expression, median_expression)
SELECT analytics.experiment_accession,
analytics.gene_id,
markers.k AS k_where_marker,
markers.cluster_id AS cluster_id_where_marker,
clusters.cluster_id,
markers.marker_probability AS marker_p_value,
avg(analytics.expression_level) AS mean_expression,
percentile_cont(0.5::double precision) WITHIN GROUP ( ORDER BY analytics.expression_level ) AS median_expression
FROM scxa_analytics analytics
JOIN scxa_top_5_marker_genes_per_cluster markers
ON analytics.experiment_accession::text = markers.experiment_accession::text AND
analytics.gene_id::text = markers.gene_id::text AND markers.experiment_accession = exp_accession
JOIN scxa_cell_clusters clusters
ON analytics.experiment_accession::text = clusters.experiment_accession::text AND
analytics.cell_id::text = clusters.cell_id::text AND clusters.k = markers.k AND clusters.experiment_accession = exp_accession
WHERE analytics.experiment_accession::text = exp_accession
GROUP BY analytics.experiment_accession, analytics.gene_id, markers.k, markers.cluster_id, clusters.cluster_id, markers.marker_probability;

END $$;
26 changes: 26 additions & 0 deletions tests/marker-genes/01-optional-create-table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,29 @@ CREATE TABLE IF NOT EXISTS scxa_marker_genes
CONSTRAINT scxa_marker_genes_experiment_accession_gene_id_k_pk
PRIMARY KEY (experiment_accession, gene_id, k)
);

CREATE TABLE IF NOT EXISTS scxa_top_5_marker_genes_per_cluster
(
r integer not null,
experiment_accession varchar(255) not null,
gene_id varchar(255) not null,
k integer not null,
cluster_id integer not null,
marker_probability double precision not null,
constraint scxa_top_5_marker_genes_per_cluster_experiment_accession_gene_id_k_cluster_id_pk
primary key (experiment_accession, gene_id, k, cluster_id)
);

CREATE TABLE IF NOT EXISTS scxa_marker_gene_stats
(
experiment_accession varchar(255) not null,
gene_id varchar(255) not null,
k_where_marker integer not null,
cluster_id_where_marker integer not null,
cluster_id integer not null,
marker_p_value double precision not null,
mean_expression float,
median_expression float,
constraint scxa_marker_gene_stats_experiment_accession_k_where_marker
primary key (experiment_accession, gene_id, k_where_marker, cluster_id_where_marker, cluster_id)
);