ebi-gene-expression-group · a-solovyev12 · Jun 18, 2020 · Jun 22, 2020 · Jun 25, 2020 · Jul 3, 2020
diff --git a/tools/tertiary-analysis/atlas-data-import/atlas_import_classifiers.xml b/tools/tertiary-analysis/atlas-data-import/atlas_import_classifiers.xml
@@ -0,0 +1,29 @@
+<tool id="atlas_import_classifiers" name="Atlas import: get classifiers" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+    <description>Import pre-trained classifiers from Single Cell Expression Atlas</description>
+    <macros>
+         <import>atlas_import_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        import_classifiers.R  --tool "${tool}" --classifiers-output-dir "${classifier_output_dir}"
+
+        #if $config_file 
+        --config-file "${config_file}" 
+        #end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" />
+        <param type="text" name="tool" label="Tool" help="For which tool should the classifiers be imported?" />
+    </inputs>
+    <outputs>
+        <collection name="imported_classifiers" type="list" label="Collection of imported classifiers">
+            <discover_datasets pattern="__name_and_ext__" directory="${classifier_output_dir}" />
+        </collection>
+    </outputs>
+    <help><![CDATA[
+    @HELP@
+
+    @VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/tertiary-analysis/atlas-data-import/atlas_import_experiment_data.xml b/tools/tertiary-analysis/atlas-data-import/atlas_import_experiment_data.xml
@@ -0,0 +1,63 @@
+<tool id="atlas_import_experiment_data" name="Atlas import: get experiment data" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+    <description>Obtain study data from Single Cell Expression Atlas</description>
+    <macros>
+         <import>atlas_import_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        get_experiment_data.R --accession-code "${accession_code}" --matrix-type "${matrix_type}" --get-sdrf "${get_sdrf}" --get-condensed-sdrf "${get_condensed_sdrf}" --get-marker-genes "${get_marker_genes}" 
+
+        #if $config_file 
+        --config-file "${config_file}" 
+        #end if
+        #if $decorated_rows 
+        --decorated-rows "${decorated_rows}" 
+        #end if
+        #if $use_default_expr_names 
+        --use-default-expr-names "${use_default_expr_names}" 
+        #end if
+        #if $get_idf 
+        --get-idf "${get_idf}" 
+        #end if
+        #if $number_of_clusters 
+        --number-of-clusters  "${number_of_clusters}" 
+        #end if
+    ]]></command>
+    <inputs>
+        <param type="text" name="accession_code" label="Accession code" help="Accession code of dataset to be downloaded" />
+        <param type="select" name="matrix_type" label="Matrix type"  help="Type of matrix to be imported">
+            <option value="raw">Raw</option>
+            <option value="filtered">Filtered</option>
+            <option value="tpm">TPM-normalised</option>
+            <option value="cpm">CPM-normalised</option>
+        </param>
+        <param type="boolean" name="get_sdrf" checked="false" label="Import SDRF file" help="Boolean indicating whether SDRF file needs to be imported" />
+        <param type="boolean" name="get_idf" checked="false" label="Import IDF file" help="Boolean indicating whether IDF file needs to be imported" />
+        <param type="boolean" name="get_condensed_sdrf" checked="false"  label="Get condensed SDRF file" help="Boolean indicating whether condensed SDRF file needs to be imported" />
+        <param type="boolean" name="get_marker_genes" checked="false"  label="Import marker genes" help="Boolean indicating whether marker genes should be imported" />
+        <param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" />
+        <param type="boolean" name="decorated_rows" checked="false"  label="Decorated rows" help="Boolean indicating whether a decorated version of the rows should be imported" />
+        <param type="boolean" name="use_default_expr_names" checked="false"  label="Use default expr names" help="Should default (non 10x-type) file names be used for expression data? Default: FALSE" />
+        <param type="integer" name="number_of_clusters" label="Number of clusters" help="Number of clusters in marker genes file" />
+    </inputs>
+    <outputs>
+        <data name="expr_mtx" format="txt" from_work_dir="${accession_code}/10x_data/matrix.mtx" />
+        <data name="barcodes" format="txt" from_work_dir="${accession_code}/10x_data/barcodes.tsv" />
+        <data name="genes" format="txt" from_work_dir="${accession_code}/10x_data/genes.tsv" />
+        <data name="sdrf" format="txt" from_work_dir="${accession_code}/sdrf.txt">
+            <filter>get_sdrf==True</filter>
+        </data>
+        <data name="idf" format="txt" from_work_dir="${accession_code}/idf.txt">
+            <filter>get_idf==True</filter>
+        </data>
+        <data name="marker_genes" format="txt" from_work_dir="${accession_code}/marker_genes_${number_of_clusters}.tsv">
+            <filter>get_marker_genes==True</filter>
+        </data>
+    </outputs>
+    <help><![CDATA[
+    @HELP@
+
+    @VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/tertiary-analysis/atlas-data-import/atlas_import_macros.xml b/tools/tertiary-analysis/atlas-data-import/atlas_import_macros.xml
@@ -0,0 +1,35 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0.0</token>
+    <token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token>
+    <token name="@PROFILE@">18.01</token>
+    <xml name="requirements">
+      <requirements>
+        <requirement type="package" version="0.0.6">atlas-data-import</requirement>
+            <yield/>
+      </requirements>
+    </xml>
+    <xml name="version">
+      <version_command><![CDATA[
+        conda list | grep atlas-data-import | egrep -o [0-9]\.[0-9]\.[0-9]
+    ]]></version_command>
+    </xml>
+    <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/.
+    ]]></token>
+    <xml name="citations">
+      <citations>
+        <citation type="bibtex">
+          @misc{github-atlas-data-import.git,
+            author = {Andrey Solovyev, EBI Gene Expression Team},
+            year = {2020},
+            title = {Scripts for extracting expression- and metadata from SCXA in a programmatic way},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            url = {https://github.com/ebi-gene-expression-group/atlas-data-import.git},
+          }
+        </citation>
+        <yield />
+      </citations>
+    </xml>
+</macros>
diff --git a/tools/tertiary-analysis/atlas-data-import/atlas_import_sdrf_files.xml b/tools/tertiary-analysis/atlas-data-import/atlas_import_sdrf_files.xml
@@ -0,0 +1,33 @@
+<tool id="atlas_import_sdrf_files" name="Atlas import: get sdrf files" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+    <description>Import sdrf files from Single Cell Expression Atlas</description>
+    <macros>
+         <import>atlas_import_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        import_sdrf_files.R --sdrf-output-dir "${sdrf_output_dir}"
+
+        #if $config_file 
+        --config-file "${config_file}" 
+        #end if
+        #if $get_condensed_sdrf 
+        --get-condensed-sdrf "${get_condensed_sdrf}" 
+        #end if
+
+    ]]></command>
+    <inputs>
+        <param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" />
+        <param type="boolean" name="get_condensed_sdrf" checked="false"  label="Get condensed sdrf files" help="Boolean indicating whether condensed SDRF files should be imported" />
+    </inputs>
+    <outputs>
+        <collection name="imported_sdrf_files" type="list" label="Collection of imported classifiers">
+            <discover_datasets pattern="__name_and_ext__" directory="${sdrf_output_dir}" />
+        </collection>
+    </outputs>
+    <help><![CDATA[
+    @HELP@
+
+    @VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/tertiary-analysis/data-scxa/atlas-retrieve-macros.xml b/tools/tertiary-analysis/data-scxa/atlas-retrieve-macros.xml
@@ -0,0 +1,35 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0.0</token>
+    <token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token>
+    <token name="@PROFILE@">18.01</token>
+    <xml name="requirements">
+      <requirements>
+        <requirement type="package" version="0.0.6">atlas-data-import</requirement>
+            <yield/>
+      </requirements>
+    </xml>
+    <xml name="version">
+      <version_command><![CDATA[
+        conda list | grep atlas-data-import | egrep -o [0-9]\.[0-9]\.[0-9]
+    ]]></version_command>
+    </xml>
+    <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/.
+    ]]></token>
+    <xml name="citations">
+      <citations>
+        <citation type="bibtex">
+          @misc{github-atlas-data-import.git,
+            author = {Andrey Solovyev, EBI Gene Expression Team},
+            year = {2020},
+            title = {Scripts for extracting expression- and metadata from SCXA in a programmatic way},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            url = {https://github.com/ebi-gene-expression-group/atlas-data-import.git},
+          }
+        </citation>
+        <yield />
+      </citations>
+    </xml>
+</macros>
diff --git a/tools/tertiary-analysis/data-scxa/atlas_import_experiment_data.xml b/tools/tertiary-analysis/data-scxa/atlas_import_experiment_data.xml
@@ -0,0 +1,141 @@
+<tool id="atlas_import_experiment_data" name="Atlas import: get experiment data" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+    <description>Retrieves expression matrixes and metadata from EBI Single Cell Expression Atlas (SCXA)</description>
+    <macros>
+         <import>atlas-retrieve-macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        get_experiment_data.R --accession-code "${accession_code}" --matrix-type "${matrix_type}" --get-sdrf "${get_sdrf}" --get-condensed-sdrf "${get_condensed_sdrf}" --get-marker-genes "${get_marker_genes}" 
+
+        #if $config_file 
+        --config-file "${config_file}" 
+        #end if
+        #if $get_exp_design
+        --get-exp-design "${get_exp_design}"
+        #end if 
+        #if $decorated_rows 
+        --decorated-rows "${decorated_rows}" 
+        #end if
+        #if $use_default_expr_names 
+        --use-default-expr-names "${use_default_expr_names}" 
+        #end if
+        #if $get_idf 
+        --get-idf "${get_idf}" 
+        #end if
+        #if $number_of_clusters 
+        --number-of-clusters  "${number_of_clusters}" 
+        #end if
+    ]]></command>
+    <inputs>
+        <param type="text" name="accession_code" label="SC-Atlas experiment accession" value="E-GEOD-100058" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve." />
+        <param type="select" name="matrix_type" label="Choose the type of matrix to download"  help="Type of matrix to be imported">
+            <option value="raw">Raw</option>
+            <option value="filtered">Filtered Counts</option>
+            <option value="tpm">TPM-normalised</option>
+            <option value="cpm">CPM-normalised</option>
+        </param>
+        <param type="boolean" name="get_sdrf" checked="false" label="Import SDRF file" help="Boolean indicating whether SDRF file needs to be imported" />
+        <param type="boolean" name="get_exp_design" checked="false" label="Import experiment design file" help="Boolean indicating whether experiment design file needs to be imported" />
+        <param type="boolean" name="get_idf" checked="false" label="Import IDF file" help="Boolean indicating whether IDF file needs to be imported" />
+        <param type="boolean" name="get_condensed_sdrf" checked="false"  label="Get condensed SDRF file" help="Boolean indicating whether condensed SDRF file needs to be imported" />
+        <param type="boolean" name="get_marker_genes" checked="false"  label="Import marker genes" help="Boolean indicating whether marker genes should be imported" />
+        <param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" />
+        <param type="boolean" name="decorated_rows" checked="false"  label="Decorated rows" help="Boolean indicating whether a decorated version of the rows should be imported" />
+        <param type="boolean" name="use_default_expr_names" checked="false"  label="Use default expr names" help="Should default (non 10x-type) file names be used for expression data? Default: FALSE" />
+        <param type="integer" name="number_of_clusters" label="Number of clusters" help="Number of clusters in marker genes file" />
+    </inputs>
+    <outputs>
+        <data name="expr_mtx" format="txt" from_work_dir="${accession_code}/10x_data/matrix.mtx" label="${tool.name} on ${on_string} ${accession} matrix.mtx (${matrix_type.value_label})" />
+        <data name="barcodes" format="txt" from_work_dir="${accession_code}/10x_data/barcodes.tsv" label="${tool.name} on ${on_string} ${accession} barcodes.tsv (${matrix_type.value_label})" />
+        <data name="genes" format="txt" from_work_dir="${accession_code}/10x_data/genes.tsv" label="${tool.name} on ${on_string} ${accession} genes.tsv (${matrix_type.value_label})" />
+        <data name="sdrf" format="txt" from_work_dir="${accession_code}/sdrf.txt" label="${tool.name} on ${on_string} ${accession} sdrf.txt (${matrix_type.value_label})" >
+            <filter>get_sdrf</filter>
+        </data>
+        <data name="idf" format="txt" from_work_dir="${accession_code}/idf.txt" label="${tool.name} on ${on_string} ${accession} idf.txt (${matrix_type.value_label})">
+            <filter>get_idf</filter>
+        </data>
+        <data name="marker_genes" format="txt" from_work_dir="${accession_code}/marker_genes_${number_of_clusters}.tsv" >
+            <filter>get_marker_genes</filter>
+        </data>
+        <data name="exp_design" format="txt" from_work_dir="${accession_code}/exp_design.tsv" >
+            <filter>get_exp_design</filter>
+        </data>
+    </outputs>
+    <help><![CDATA[
+=================================================================================
+Gene expression analysis in single cells across species and biological conditions
+=================================================================================
+
+Single Cell Expression Atlas supports research in single cell transcriptomics.
+The Atlas annotates publicly available single cell RNA-Seq experiments with
+ontology identifiers and re-analyses them using standardised pipelines available
+through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of
+clusters of cells, their annotations and supports searches for gene expression
+within and across studies.
+
+For more information check https://www.ebi.ac.uk/gxa/sc/home
+
+EBI SCXA Data Retrieval
+-----------------------
+
+The data retrieval tool presented here allows the user to retrieve expression matrices
+and metadata for any public experiment available at EBI Single Cell Expression Atlas.
+
+To use it, simply set the accession for the desired experiment and choose the type of
+matrix that you want to download:
+
+:Raw counts:
+  Un-normalised, unfiltered version of the expression data. 
+
+:Filtered counts:
+  This should be the default choice for running clustering and another analysis
+  methods where you will introduce scaling and normalization of the data. The filtering
+  is based on the quality control applied by iRAP prior to pseudo-alignment and quantification.
+
+:TPMs:
+  TPM stands for Transcripts Per Kilobase Million, and as the name implies, this has been
+  already normalized/scaled. You should keep this in mind when using this data
+  on methods that will try to normalise data as part of their procedure. Due to technical
+  particularities in the current Atlas SC pipeline, TPMs available here are not filtered.
+  **Note: droplet databases won't have TPM data**
+
+:CPMS:
+  CPM normalisation stands for Counts Per Kilobase Million. As TPMs, these matrices are already normalised/scaled. You should keep this in mind when using this data on methods that will try to normalise data as part of their procedure.   
+
+Outputs will be:
+
+:Matrix (txt):
+  Contains the expression values for genes (rows) and samples/runs/cells (columns),
+  in either raw filtered counts or filtered tpms depending on the choice made. This
+  text file is formatted as a Matrix Market file, and as such it is accompanied by
+  separate files for the gene identifiers and the samples/runs/cells identifiers.
+
+:Genes (tsv):
+  Identifiers (column repeated) for the genes present in the matrix of expression,
+  in the same order as the matrix rows.
+
+:Barcodes (tsv):
+  Identifiers for the cells, samples or runs of the data matrix. The file is ordered
+  to match the columns of the matrix.
+
+Optional outputs: 
+
+:Experiment Design file (tsv):
+  Contains metadata for the different cells/samples/runs of the experiment.
+  Please note that this file is generated before the filtering step, and while not
+  often, it might be the case that it contains more cells/samples/runs than the matrix.
+
+:SDRF file (txt):
+  Similar to Experiment Design file, contains information on individual cells/sequencing runs. Might contain information on technical duplicates. 
+
+:IDF file (txt): 
+  IDF file holds general information about the sequencing experiment and interpretation of the fields in SDRF/metadata files. 
+
+:Marker gene file (txt):
+  File containing information on marker genes that differentiate cell types present in the sequencing experiment. 
+
+@HELP@
+@VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/tertiary-analysis/data-scxa/retrieve-classifiers.xml b/tools/tertiary-analysis/data-scxa/retrieve-classifiers.xml
@@ -0,0 +1,29 @@
+<tool id="atlas_import_classifiers" name="Atlas import: get classifiers" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+    <description>Import pre-trained classifiers from Single Cell Expression Atlas</description>
+    <macros>
+         <import>atlas-retrieve-macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        import_classifiers.R  --tool "${tool}" --classifiers-output-dir "${classifier_output_dir}"
+
+        #if $config_file 
+        --config-file "${config_file}" 
+        #end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" />
+        <param type="text" name="tool" label="Tool" help="For which tool should the classifiers be imported?" />
+    </inputs>
+    <outputs>
+        <collection name="imported_classifiers" type="list" label="Collection of imported classifiers">
+            <discover_datasets pattern="__name_and_ext__" directory="${classifier_output_dir}" />
+        </collection>
+    </outputs>
+    <help><![CDATA[
+    @HELP@
+
+    @VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>