-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Feature/add atlas data import #174
base: develop
Are you sure you want to change the base?
Changes from 12 commits
fb6003f
891e9e5
cc22b6e
e4756b0
04a9cf5
cb6b4b1
14a4e30
9db4b8b
8e0709b
eac6424
1125531
ec3ffc2
f6a8d0f
125cf46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<tool id="atlas_import_classifiers" name="Atlas import: get classifiers" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | ||
<description>Import pre-trained classifiers from Single Cell Expression Atlas</description> | ||
<macros> | ||
<import>atlas_import_macros.xml</import> | ||
</macros> | ||
<expand macro="requirements" /> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
import_classifiers.R --tool "${tool}" --classifiers-output-dir "${classifier_output_dir}" | ||
|
||
#if $config_file | ||
--config-file "${config_file}" | ||
#end if | ||
]]></command> | ||
<inputs> | ||
<param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" /> | ||
<param type="text" name="tool" label="Tool" help="For which tool should the classifiers be imported?" /> | ||
</inputs> | ||
<outputs> | ||
<collection name="imported_classifiers" type="list" label="Collection of imported classifiers"> | ||
<discover_datasets pattern="__name_and_ext__" directory="${classifier_output_dir}" /> | ||
</collection> | ||
</outputs> | ||
<help><![CDATA[ | ||
@HELP@ | ||
|
||
@VERSION_HISTORY@ | ||
]]></help> | ||
<expand macro="citations" /> | ||
</tool> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
<tool id="atlas_import_experiment_data" name="Atlas import: get experiment data" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be all options of the main downloader I would say. Otherwise we clutter with an unnecessarily high number of tools. |
||
<description>Obtain study data from Single Cell Expression Atlas</description> | ||
<macros> | ||
<import>atlas_import_macros.xml</import> | ||
</macros> | ||
<expand macro="requirements" /> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
get_experiment_data.R --accession-code "${accession_code}" --matrix-type "${matrix_type}" --get-sdrf "${get_sdrf}" --get-condensed-sdrf "${get_condensed_sdrf}" --get-marker-genes "${get_marker_genes}" | ||
|
||
#if $config_file | ||
--config-file "${config_file}" | ||
#end if | ||
#if $decorated_rows | ||
--decorated-rows "${decorated_rows}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this for instance is already part of the main downloader I think. |
||
#end if | ||
#if $use_default_expr_names | ||
--use-default-expr-names "${use_default_expr_names}" | ||
#end if | ||
#if $get_idf | ||
--get-idf "${get_idf}" | ||
#end if | ||
#if $number_of_clusters | ||
--number-of-clusters "${number_of_clusters}" | ||
#end if | ||
]]></command> | ||
<inputs> | ||
<param type="text" name="accession_code" label="Accession code" help="Accession code of dataset to be downloaded" /> | ||
<param type="select" name="matrix_type" label="Matrix type" help="Type of matrix to be imported"> | ||
<option value="raw">Raw</option> | ||
<option value="filtered">Filtered</option> | ||
<option value="tpm">TPM-normalised</option> | ||
<option value="cpm">CPM-normalised</option> | ||
</param> | ||
<param type="boolean" name="get_sdrf" checked="false" label="Import SDRF file" help="Boolean indicating whether SDRF file needs to be imported" /> | ||
<param type="boolean" name="get_idf" checked="false" label="Import IDF file" help="Boolean indicating whether IDF file needs to be imported" /> | ||
<param type="boolean" name="get_condensed_sdrf" checked="false" label="Get condensed SDRF file" help="Boolean indicating whether condensed SDRF file needs to be imported" /> | ||
<param type="boolean" name="get_marker_genes" checked="false" label="Import marker genes" help="Boolean indicating whether marker genes should be imported" /> | ||
<param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" /> | ||
<param type="boolean" name="decorated_rows" checked="false" label="Decorated rows" help="Boolean indicating whether a decorated version of the rows should be imported" /> | ||
<param type="boolean" name="use_default_expr_names" checked="false" label="Use default expr names" help="Should default (non 10x-type) file names be used for expression data? Default: FALSE" /> | ||
<param type="integer" name="number_of_clusters" label="Number of clusters" help="Number of clusters in marker genes file" /> | ||
</inputs> | ||
<outputs> | ||
<data name="expr_mtx" format="txt" from_work_dir="${accession_code}/10x_data/matrix.mtx" /> | ||
<data name="barcodes" format="txt" from_work_dir="${accession_code}/10x_data/barcodes.tsv" /> | ||
<data name="genes" format="txt" from_work_dir="${accession_code}/10x_data/genes.tsv" /> | ||
<data name="sdrf" format="txt" from_work_dir="${accession_code}/sdrf.txt"> | ||
<filter>get_sdrf==True</filter> | ||
</data> | ||
<data name="idf" format="txt" from_work_dir="${accession_code}/idf.txt"> | ||
<filter>get_idf==True</filter> | ||
</data> | ||
<data name="marker_genes" format="txt" from_work_dir="${accession_code}/marker_genes_${number_of_clusters}.tsv"> | ||
<filter>get_marker_genes==True</filter> | ||
</data> | ||
</outputs> | ||
<help><![CDATA[ | ||
@HELP@ | ||
|
||
@VERSION_HISTORY@ | ||
]]></help> | ||
<expand macro="citations" /> | ||
</tool> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.0.0</token> | ||
<token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token> | ||
<token name="@PROFILE@">18.01</token> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="0.0.6">atlas-data-import</requirement> | ||
<yield/> | ||
</requirements> | ||
</xml> | ||
<xml name="version"> | ||
<version_command><![CDATA[ | ||
conda list | grep atlas-data-import | egrep -o [0-9]\.[0-9]\.[0-9] | ||
]]></version_command> | ||
</xml> | ||
<token name="@VERSION_HISTORY@"><![CDATA[ | ||
**Version history** | ||
0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/. | ||
]]></token> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="bibtex"> | ||
@misc{github-atlas-data-import.git, | ||
author = {Andrey Solovyev, EBI Gene Expression Team}, | ||
year = {2020}, | ||
title = {Scripts for extracting expression- and metadata from SCXA in a programmatic way}, | ||
publisher = {GitHub}, | ||
journal = {GitHub repository}, | ||
url = {https://github.com/ebi-gene-expression-group/atlas-data-import.git}, | ||
} | ||
</citation> | ||
<yield /> | ||
</citations> | ||
</xml> | ||
</macros> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<tool id="atlas_import_sdrf_files" name="Atlas import: get sdrf files" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that is worth having a whole tool just for this, I would add it to the main downloader with an optional activation switch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pcm32 I was thinking so too initially, but then decided to make a separate tool, because it's a different use case. In atlas-data-import, we specify single accession and import data. Here, the tool finds available classifiers for all datasets, unless specified otherwise, and imports them. Putting all this into one script will be a mess. |
||
<description>Import sdrf files from Single Cell Expression Atlas</description> | ||
<macros> | ||
<import>atlas_import_macros.xml</import> | ||
</macros> | ||
<expand macro="requirements" /> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
import_sdrf_files.R --sdrf-output-dir "${sdrf_output_dir}" | ||
|
||
#if $config_file | ||
--config-file "${config_file}" | ||
#end if | ||
#if $get_condensed_sdrf | ||
--get-condensed-sdrf "${get_condensed_sdrf}" | ||
#end if | ||
|
||
]]></command> | ||
<inputs> | ||
<param type="data" name="config_file" label="Config file" format="yml" help="Config file with user-provided parameters" /> | ||
<param type="boolean" name="get_condensed_sdrf" checked="false" label="Get condensed sdrf files" help="Boolean indicating whether condensed SDRF files should be imported" /> | ||
</inputs> | ||
<outputs> | ||
<collection name="imported_sdrf_files" type="list" label="Collection of imported classifiers"> | ||
<discover_datasets pattern="__name_and_ext__" directory="${sdrf_output_dir}" /> | ||
</collection> | ||
</outputs> | ||
<help><![CDATA[ | ||
@HELP@ | ||
|
||
@VERSION_HISTORY@ | ||
]]></help> | ||
<expand macro="citations" /> | ||
</tool> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.0.0</token> | ||
<token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token> | ||
<token name="@PROFILE@">18.01</token> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="0.0.10">atlas-data-import</requirement> | ||
<yield/> | ||
</requirements> | ||
</xml> | ||
<xml name="version"> | ||
<version_command><![CDATA[ | ||
conda list | grep atlas-data-import | egrep -o [0-9]\.[0-9]\.[0-9] | ||
]]></version_command> | ||
</xml> | ||
<token name="@VERSION_HISTORY@"><![CDATA[ | ||
**Version history** | ||
0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/. | ||
]]></token> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="bibtex"> | ||
@misc{github-atlas-data-import.git, | ||
author = {Andrey Solovyev, EBI Gene Expression Team}, | ||
year = {2020}, | ||
title = {Scripts for extracting expression- and metadata from SCXA in a programmatic way}, | ||
publisher = {GitHub}, | ||
journal = {GitHub repository}, | ||
url = {https://github.com/ebi-gene-expression-group/atlas-data-import.git}, | ||
} | ||
</citation> | ||
<yield /> | ||
</citations> | ||
</xml> | ||
</macros> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
<tool id="atlas_import_classifiers" name="Atlas import: get classifiers" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | ||
<description>Import pre-trained classifiers and SDRF files for a range of studies from Single Cell Expression Atlas</description> | ||
<macros> | ||
<import>atlas-retrieve-macros.xml</import> | ||
</macros> | ||
<expand macro="requirements" /> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
import_classification_data.R --tool "${tool}" | ||
|
||
#if $config_file | ||
--config-file "${config_file}" | ||
#end if | ||
#if $import_sdrf_files.get_sdrf | ||
--get-sdrf "${import_sdrf_files.get_sdrf}" | ||
--condensed-sdrf "${import_sdrf_files.condensed_sdrf}" | ||
#end if | ||
#if $get_tool_perf_table | ||
--get-tool-perf-table "${get_tool_perf_table}" | ||
#end if | ||
#if $tool_perf_table_output_path | ||
--tool-perf-table-output-path "${tool_perf_table_output_path}" | ||
#end if | ||
|
||
]]></command> | ||
<inputs> | ||
<param type="data" name="config_file" label="Config file" format="yml" optional="true" help="Config file with user-provided parameters" /> | ||
<param type="text" name="tool" label="Tool" help="For which tool should the classifiers be imported?" /> | ||
<conditional name="import_sdrf_files"> | ||
<param name="get_sdrf" type="boolean" checked="false" label="Import SDRF files for the given experiments?" help="If specified, SDRF files will be imported"/> | ||
<when value="true" > | ||
<param name="condensed_sdrf" type="boolean" label="Import condensed version of SDRF files?" checked="false" help="Should condensed SDRF files be imported? By default, normal version is imported." /> | ||
</when> | ||
<when value="false"> | ||
<param name="condensed_sdrf" type="hidden" value="NULL" /> | ||
</when> | ||
</conditional> | ||
<param name="get_tool_perf_table" type="boolean" checked="false" label="Import tool performance table" help="If specified, tool performance table will be imported"/> | ||
</inputs> | ||
<outputs> | ||
<collection name="imported_classifiers" type="list" label="Collection of imported classifiers"> | ||
<discover_datasets pattern="__name_and_ext__" directory="imported_classifiers" /> | ||
</collection> | ||
<collection name="imported_sdrfs" type="list" label="Collection of imported SDRF files"> | ||
<discover_datasets pattern="__name_and_ext__" directory="imported_SDRFs" /> | ||
<filter>import_sdrf_files['get_sdrf']</filter> | ||
</collection> | ||
<data name="tool_perf_table_output_path" format="tsv"> | ||
<filter>get_tool_perf_table</filter> | ||
</data> | ||
</outputs> | ||
<help><![CDATA[ | ||
@HELP@ | ||
|
||
@VERSION_HISTORY@ | ||
]]></help> | ||
<expand macro="citations" /> | ||
</tool> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would suggest that we keep this in a separate branch. We cannot merge this and add it to the toolshed until classifier data is not a part of the Atlas SC release.