-
Notifications
You must be signed in to change notification settings - Fork 1
/
readIllumina.R
86 lines (58 loc) · 2.57 KB
/
readIllumina.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#############################################
#
# Take a set of .CEL files, output affyBatch
#
#############################################
# Galaxy is sensitive to output messages, so make sure we control what's output
library(affy)
cl = commandArgs(trailingOnly=T)
sample_probe_profile = cl[1]
control_probe_profile = cl[2]
sample_info_file = cl[3]
illumina_annotation_main = cl[4]
illumina_annotation_control = cl[5]
experimentfile = cl[6]
outfile = cl[7]
res = ''
# First extract the tar file to get the CEL files
# Then read in the experiment
print (paste("Reading experiment file ", experimentfile))
experiment <- read.delim(file=experimentfile, sep="\t", row.names=1, stringsAsFactors=FALSE)
print ("Done reading experiment")
# Now read the expression files
library(lumi)
# Now read in, dending on the optional ones
if (sample_info_file != 0){
result <- lumiR.batch(sample_probe_profile, sampleInfoFile=sample_info_file, convertNuID=FALSE)
}else{
result <- lumiR.batch(sample_probe_profile, convertNuID=FALSE)
}
############
#
# Check IDs
#
############
# if these are not ILMN... ids, then the annotation packages don't work. Can be converted with the appropriate information extracted from the files from illumina at http://www.switchtoi.com/annotationfiles.ilmn
if (length(grep("ILMN", featureNames(result))) == 0){
if (illumina_annotation_main != 0){
illumina_anno <- read.delim(file=illumina_annotation_main, sep="\t", header=TRUE)
featureNames(result) <- illumina_anno$Probe_Id[match(featureNames(result), illumina_anno$Array_Address_Id)]
}else{
stop("This dataset has non-ILMN IDs, almost certainly 'Arrray Address Ids'. Go to http://www.switchtoi.com/annotationfiles.ilmn and download the annotation file from your chip, upload it with the upload tool, and parse it with 'Parse Illumina annoation...'")
}
}
#########
# Re-arrange the data to agree with the experiment. Do this BEFORE adding the controls, because it messes up the controlData slot
result <- result[,match(rownames(experiment), rownames(pData(result)))]
# Add in the controls
if (control_probe_profile != 0){
result <- addControlData2lumi(control_probe_profile, result)
}
# Rename to the 'Name' column in two places
if (is.null(experiment$Name)){
experiment$Name <- rownames(experiment)
}
pData(phenoData(result))$sampleID <- experiment$Name[match(pData(phenoData(result))$sampleID, rownames(experiment))]
sampleNames(result) <- experiment$Name[match(sampleNames(result), rownames(experiment))]
# Now we do the output
saveRDS(result, file = outfile)