-
Notifications
You must be signed in to change notification settings - Fork 5
/
seurat-scale-data.R
executable file
·162 lines (146 loc) · 5.34 KB
/
seurat-scale-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env Rscript
# Load optparse we need to check inputs
suppressPackageStartupMessages(require(optparse))
# Load common functions
suppressPackageStartupMessages(require(workflowscriptscommon))
# parse options
option_list = list(
make_option(
c("-i", "--input-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which a serialized R matrix object may be found."
),
make_option(
c("--input-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
),
make_option(
c("--output-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
),
make_option(
c("-e", "--genes-use"),
action = "store",
default = NULL,
type = 'character',
help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data."
),
make_option(
c("-v", "--vars-to-regress"),
action = "store",
default = NULL,
type = 'character',
help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito."
),
make_option(
c("-m", "--model-use"),
action = "store",
default = 'linear',
type = 'character',
help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'."
),
make_option(
c("-u", "--use-umi"),
action = "store",
default = FALSE,
type = 'logical',
help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'."
),
make_option(
c("-s", "--do-not-scale"),
action = "store_true",
default = FALSE,
type = 'logical',
help = "Skip the data scale."
),
make_option(
c("-c", "--do-not-center"),
action = "store_true",
default = FALSE,
type = 'logical',
help = "Skip data centering."
),
make_option(
c("-x", "--scale-max"),
action = "store",
default = 10,
type = 'double',
help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50."
),
make_option(
c("-b", "--block-size"),
action = "store",
default = 1000,
type = 'integer',
help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost."
),
make_option(
c("-d", "--min-cells-to-block"),
action = "store",
default = 1000,
type = 'integer',
help = "If object contains fewer than this number of cells, don't block for scaling calculations."
),
make_option(
c("-n", "--check-for-norm"),
action = "store",
default = TRUE,
type = 'logical',
help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)."
),
make_option(
c("-o", "--output-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store serialized R object of type 'Seurat'.'"
)
)
opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file'))
# Check parameter values
if ( ! file.exists(opt$input_object_file)){
stop((paste('File', opt$input_object_file, 'does not exist')))
}
if (! is.null(opt$genes_use)){
if (! file.exists(opt$genes_use)){
stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
}else{
genes_use <- readLines(opt$genes_use)
}
}else{
genes_use <- NULL
}
# Now we're hapy with the arguments, load Seurat and do the work
suppressPackageStartupMessages(require(Seurat))
if(opt$input_format == "loom" | opt$output_format == "loom") {
suppressPackageStartupMessages(require(SeuratDisk))
} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
suppressPackageStartupMessages(require(scater))
}
# Input from serialized R object
seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function
# might be useful
scaled_seurat_object <- ScaleData(seurat_object,
features = genes_use,
vars.to.regress = opt$vars_to_regress,
model.use = opt$model_use,
use.umi = opt$use_umi,
do.scale = !opt$do_not_scale,
do.center = !opt$do_not_center,
scale.max = opt$scale_max,
block.size = opt$block_size,
min.cells.to.block = opt$min_cells_to_block,
verbose = FALSE)
# Output to a serialized R object
write_seurat4_object(seurat_object = scaled_seurat_object,
output_path = opt$output_object_file,
format = opt$output_format)