Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Additions to avoid hard coded paths in atlas for condensed SDRF calling #26

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions applyAllFixesForExperiment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env bash


usage() { echo "Usage: $0 <accession> path-to-atlas-exps" 1>&2; }

expAcc=$1

if [ -z "${expAcc}" ]; then
usage
exit 1
fi

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

exp=$1
echo "Applying fixes for $exp ..."

# Applies fixes encoded in $fixesFile to $exp.$fileTypeToBeFixed.txt
applyFixes() {
exp=$1
fixesFile=$2
fileTypeToBeFixed=$3

# Apply factor type fixes in ${fileTypeToBeFixed} file
for l in $(cat $scriptDir/$fixesFile | sed 's|[[:space:]]*$||g');
do
if [ ! -s "$exp/$exp.${fileTypeToBeFixed}" ]; then
echo "ERROR: $exp/$exp.${fileTypeToBeFixed} not found or is empty" >&2
return 1
fi
echo $l | grep -P '\t' > /dev/null
if [ $? -ne 0 ]; then
echo "WARNING: line: '$l' in automatic_fixes_properties.txt is missing a tab character - not applying the fix "
fi
correct=`echo $l | awk -F"\t" '{print $1}'`
toBeReplaced=`echo $l | awk -F"\t" '{print $2}' | sed 's/[^-A-Za-z0-9_ ]/\\\&/g'`

if [ "$fixesFile" == "automatic_fixes_properties.txt" ]; then
# in sdrf or condensed-sdrv fix factor/characteristic types only
#if [ "$fileTypeToBeFixed" == "sdrf.txt" ]; then
#perl -pi -e "s|\[${toBeReplaced}\]|[${correct}]|g" $exp/$exp.${fileTypeToBeFixed}
if [ "$fileTypeToBeFixed" == "condensed-sdrf.tsv" ]; then
# In condensed-sdrf, the factor/characteristic type is the penultimate column - so tabs on both sides
perl -pi -e "s|\t${toBeReplaced}\t|\t${correct}\t|g" $exp/$exp.${fileTypeToBeFixed}
else
# idf
perl -pi -e "s|\t${toBeReplaced}\t|\t${correct}\t|g" $exp/$exp.${fileTypeToBeFixed}
perl -pi -e "s|\t${toBeReplaced}$|\t${correct}|g" $exp/$exp.${fileTypeToBeFixed}
fi
elif [ "$fixesFile" == "automatic_fixes_values.txt" ]; then
#if [ "$fileTypeToBeFixed" == "sdrf.txt" ]; then
#perl -pi -e "s|\t${toBeReplaced}\t|\t${correct}\t|g" $exp/$exp.${fileTypeToBeFixed}
#perl -pi -e "s|\t${toBeReplaced}$|\t${correct}|g" $exp/$exp.${fileTypeToBeFixed}
if [ "$fileTypeToBeFixed" == "condensed-sdrf.tsv" ]; then
# In condensed-sdrf, the factor/characteristic value is the last column - so tab on the left and line ending on the right
perl -pi -e "s|\t${toBeReplaced}$|\t${correct}|g" $exp/$exp.${fileTypeToBeFixed}
fi
fi
done
}

# Apply factor type fixes in idf file
applyFixes $exp automatic_fixes_properties.txt idf.txt
if [ $? -ne 0 ]; then
echo "ERROR: Applying factor type fixes in idf file for $exp failed" >&2
return 1
fi

# Apply factor/sample characteristic type fixes to the condensed-sdrf file
applyFixes $exp automatic_fixes_properties.txt condensed-sdrf.tsv
if [ $? -ne 0 ]; then
echo "ERROR: Applying sample characteristic/factor types fixes in sdrf file for $exp failed" >&2
return 1
fi
# Apply sample characteristic/factor value fixes to the condensed-sdrf file
applyFixes $exp automatic_fixes_values.txt condensed-sdrf.tsv
if [ $? -ne 0 ]; then
echo "ERROR: Applying sample characteristic/factor value fixes in sdrf file for $exp failed" >&2
return 1
fi
3 changes: 3 additions & 0 deletions atlas-experiment-metadata-test.bats
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,6 @@ setup() {
[ -f "$test_unmelted_sdrf" ]
}

@test "Test get experiment type from XML" {
run perl get_experiment_type_from_xml.pl
}
14 changes: 14 additions & 0 deletions get_experiment_type_from_xml.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env perl
#

use strict;
use warnings;
use 5.10.0;

use Atlas::AtlasConfig::Reader qw( parseAtlasConfig );

my $xmlFilename = shift;
my $experimentConfig = parseAtlasConfig( $xmlFilename );
my $experimentType = $experimentConfig->get_atlas_experiment_type;

print $experimentType;
62 changes: 62 additions & 0 deletions get_magetab_for_experiment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env bash


usage() { echo "Usage: $0 <accession> path-to-atlas-exps" 1>&2; }

expAcc=$1
ATLAS_EXPS=$2

if [ -z "${expAcc}" ] || [ -z "${ATLAS_EXPS}" ]; then
usage
exit 1
fi

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

# Get the experiment type from the XML config.
expType=$(${scriptDir}/get_experiment_type_from_xml.pl $expAcc/$expAcc-configuration.xml)
if [ $? -ne 0 ]; then
echo "ERROR: failed to get $expAcc experiment type from XML config. Cannot generate condensed SDRF."
exit 1
fi

# Now generate condensed sdrf containing ontology mappings from Zooma. This
# will also copy IDF from ArrayExpress load directory (using "-i" option).
# If this is a baseline experiment, pass the factors XML filename as well to ensure factors match in condensed SDRF.
if [[ $expType == *baseline ]]; then

${scriptDir}/condense_sdrf.pl -e $expAcc -f $expAcc/$expAcc-factors.xml -z -i -o $expAcc
if [ $? -ne 0 ]; then
echo "ERROR: Failed to generate $expAcc/${expAcc}.condensed-sdrf.tsv with Zooma mappings, trying without..."
${scriptDir}/condense_sdrf.pl -e $expAcc -f $expAcc/$expAcc-factors.xml -i -o $expAcc
fi
if [ $? -ne 0 ]; then
echo "ERROR: Failed to generate $expAcc/${expAcc}.condensed-sdrf.tsv"
return 1
fi
else

${scriptDir}/condense_sdrf.pl -e $expAcc -z -i -o $expAcc
if [ $? -ne 0 ]; then
echo "ERROR: Failed to generate $expAcc/${expAcc}.condensed-sdrf.tsv with Zooma mappings, trying without..."
${scriptDir}/condense_sdrf.pl -e $expAcc -i -o $expAcc
fi
if [ $? -ne 0 ]; then
echo "ERROR: Failed to generate $expAcc/${expAcc}.condensed-sdrf.tsv"
return 1
fi
fi

if [ ! -s "$expAcc/${expAcc}.condensed-sdrf.tsv" ]; then
echo "ERROR: Failed to generate $expAcc/${expAcc}.condensed-sdrf.tsv"
return 1
fi

applyAllFixesForExperiment $expAcc
if [ $? -ne 0 ]; then
echo "ERROR: Applying fixes for experiment $e failed" >&2
return 1
fi

rm -rf $expAcc/$expAcc-zoomifications-log.tsv
popd