-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_sorting_hat_test_data.sample.sh
163 lines (138 loc) · 5.4 KB
/
run_sorting_hat_test_data.sample.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env bash
GOAL='recalculations'
# recalculations not currently implemented for proteomics experiments
CLUSTER=${CLUSTER:-"SLURM"}
NEXPS=${NEXPS:-30}
NJOBS=${NJOBS:-10}
GTF=$( pwd )/test-data/gff
FORCEALL=${FORCEALL:-true}
RESTART_TIMES=3
SKIP_STEPS=$( pwd )/step_skip.yaml
SKIP_ACC_REPROC=$( pwd )/accession_skip_reprocess.yaml
TEMPLATE_METHODS_BASELINE=$( pwd )/baseline_atlas_methods_template.conf
TEMPLATE_METHODS_DIFFERENTIAL=$( pwd )/differential_atlas_methods_template.conf
ZOOMA_EXCLUSIONS=$( pwd )/zooma_exclusions.yml
RUN_DECONVOLUTION=$( pwd )/accession_deconvolution.yaml
DECONV_REF=$atlas_prod/deconvolution_references/YYYY-MM-DD/
ISL_DIR=path/to/isl_dir
ISL_GENOMES_REFERENCES=$ISL_GENOMES
IRAP_VERSIONS=path/to/atlasprod/irap_versions.mk
IRAP_CONTAINER=path/to/singularity/irap_container:vx.y.z.sif
TMP_DEFINED=path/to/tmp_dir
TMP_DIR=${TMP_DEFINED:-$(mktemp -d)}
# CHECK_SPECIES file should come from a clone of the atlas-config repo in Jenkins:
CHECK_SPECIES=$( pwd )/atlas-species-name-mapping.yaml
BIOENTITIES_PROPERTIES=$( pwd )/test-data/bioentity_properties
SORTING_HAT=${SORTING_HAT:-$( pwd )/Snakefile-sorting-hat}
LOG_HANDLER=${LOG_HANDLER:-$( pwd )/log_handler.py}
SN_CONDA_PREFIX=${SN_CONDA_PREFIX:-$( pwd )/conda_installs}
if [ "$CLUSTER" = "LSF" ]; then
PROFILE_LINE="--profile lsf-profilename"
SLURM_OR_NONE=""
elif [ "$CLUSTER" = "SLURM" ]; then
PROFILE_LINE="--profile slurm-profilename"
SLURM_OR_NONE="--slurm"
else
echo "'$CLUSTER' must be either LSF or SLURM"
exit 1
fi
CLUSTER_CONFIG=${CLUSTER_CONFIG:-$( pwd )/cluster.yaml}
[ ! -z ${BIOSTUDIES_AE_PRIVACY_STATUS_FILE+x} ] || (echo "Env var BIOSTUDIES_AE_PRIVACY_STATUS_FILE not defined." && exit 1)
if [ ! -f "$BIOSTUDIES_AE_PRIVACY_STATUS_FILE" ]; then
echo "$BIOSTUDIES_AE_PRIVACY_STATUS_FILE does not exist"
exit 1
fi
CONDA_PREFIX_LINE="--conda-prefix $SN_CONDA_PREFIX"
export LOG_PATH=${LOG_PATH:-$( pwd )/sorting.log}
USUAL_SM_ERR_OUT=${USUAL_SM_ERR_OUT:-$( pwd )/snakemake.log}
# isl db
ORACLE_BASE=path/to/oracle_base
ORACLE_HOME=path/to/oracle_home
PYTHON_USER=
PYTHON_CONNECT_STRING=
PYTHON_PASSWORD=
PROT_MAGETABFILES=
# Optionally one, either ACCESSIONS or SPECIES can be defined
# otherwise ACCESSIONS supersedes SPECIES
# SPECIES=homo_sapiens:rattus_norvegicus
# ACCESSIONS=E-MTAB-2812
TOMCAT_HOST=
TOMCAT_HOST_USERNAME=
TOMCAT_HOST_PASSWORD=
###
if [ "$FORCEALL" = true ]; then FORCE_ALL="--forceall"; else FORCE_ALL=""; fi
if [ "$GOAL" != 'reprocess' ] && [ "$GOAL" != 'recalculations' ]; then
echo "'$GOAL' is not a valid analysis for GXA"
exit 1
fi
if [ -z ${ACCESSIONS+x} ]; then
ACC=""
else
ACC="accessions="$ACCESSIONS
fi
if [ -z ${SPECIES+x} ]; then
SPE=""
else
if [ -z ${ACCESSIONS+x} ]; then
SPE="species="$SPECIES
else
SPE=""
fi
fi
#create conda envs only
for yaml_file in $(ls $( pwd )/envs); do
echo $yaml_file
snakemake --use-conda --conda-create-envs-only $CONDA_PREFIX_LINE $PROFILE_LINE \
--conda-frontend mamba \
--config \
yamlFile=$( pwd )/envs/$yaml_file \
-s Snakefile-create-conda-envs
done
pushd $1
rm -f $LOG_PATH
touch $LOG_PATH
tail -f $LOG_PATH &
start=`date +%s`
echo 'starting bulk '$GOAL'...'
snakemake $SLURM_OR_NONE --use-conda --conda-frontend mamba \
--log-handler-script $LOG_HANDLER \
$PROFILE_LINE \
$FORCE_ALL \
$CONDA_PREFIX_LINE \
--latency-wait 10 \
--keep-going \
--config $ACC $SPE gtf_dir=$GTF \
atlas_prod=path/to/atlasprod \
atlas_exps=path/to/atlasexps \
cluster_config=$CLUSTER_CONFIG \
deconv_ref=$DECONV_REF \
goal=$GOAL \
atlas_meta_config=path/to/supporting_files \
skip_steps_file=$SKIP_STEPS \
skip_accessions_reproc_file=$SKIP_ACC_REPROC \
run_deconv_file=$RUN_DECONVOLUTION \
methods_base=$TEMPLATE_METHODS_BASELINE \
methods_dif=$TEMPLATE_METHODS_DIFFERENTIAL \
zooma_exclusions=$ZOOMA_EXCLUSIONS \
isl_genomes=$ISL_GENOMES_REFERENCES \
isl_dir=$ISL_DIR \
irap_versions=$IRAP_VERSIONS \
irap_container=$IRAP_CONTAINER \
tmp_dir=$TMP_DIR \
priv_stat_file=$BIOSTUDIES_AE_PRIVACY_STATUS_FILE \
check_sp_file=$CHECK_SPECIES \
oracle_home=$ORACLE_HOME \
python_user=$PYTHON_USER \
python_connect_string=$PYTHON_CONNECT_STRING \
python_password=$PYTHON_PASSWORD \
tomcat_host_username=$TOMCAT_HOST_USERNAME \
tomcat_host_password=$TOMCAT_HOST_PASSWORD \
tomcat_host=$TOMCAT_HOST \
prot_magetabfiles=$PROT_MAGETABFILES \
sm_options="$SLURM_OR_NONE --use-conda --conda-frontend mamba --keep-going $PROFILE_LINE -j $NJOBS $CONDA_PREFIX_LINE $FORCE_ALL --restart-times $RESTART_TIMES " \
bioentities_properties=$BIOENTITIES_PROPERTIES -j $NEXPS -s $SORTING_HAT &> $USUAL_SM_ERR_OUT
end=`date +%s`
echo "bulk '$GOAL' took: "`expr $end - $start`" s"
sleep 5
# remove tail process
kill $(jobs -p)