-
Notifications
You must be signed in to change notification settings - Fork 5
/
seurat-find-clusters.R
executable file
·186 lines (163 loc) · 6.12 KB
/
seurat-find-clusters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env Rscript
# Load optparse we need to check inputs
suppressPackageStartupMessages(require(optparse))
# Load common functions
suppressPackageStartupMessages(require(workflowscriptscommon))
# parse options
option_list = list(
make_option(
c("-i", "--input-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which a serialized R matrix object may be found."
),
make_option(
c("--input-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
),
make_option(
c("--output-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
),
make_option(
c("-r", "--resolution"),
action = "store",
default = 0.8,
type = 'double',
help = "Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities."
),
make_option(
c("-a", "--algorithm"),
action = "store",
default = 1,
type = 'integer',
help = "Algorithm for modularity optimization (1 = original Louvain algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM algorithm; 4 Leiden)."
),
make_option(
c("-m", "--tmp-file-location"),
action = "store",
default = NULL,
type = 'character',
help = "Directory where intermediate files will be written. Specify the ABSOLUTE path."
),
make_option(
c("--modularity-fxn"),
action = "store",
default = 1,
type = 'integer',
help = "Modularity function: 1 standard, 2 alternative."
),
make_option(
c("--method"),
action = "store",
default = 'matrix',
type = 'character',
help = "Method for leiden (defaults to matrix which is fast for small datasets). Enable method = \"igraph\" to avoid casting large data to a dense matrix."
),
make_option(
c("-o", "--output-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store serialized R object of type 'Seurat'.'"
),
make_option(
c("-t", "--output-text-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store text format set of clusters."
),
make_option(
c("--graph-name"),
action = "store",
default = NULL,
type = 'character',
help = "Name of graph to use for the clustering algorithm."
),
make_option(
c("-s", "--nrandom-starts"),
action = "store",
default = 10,
type = 'integer',
help = "Number of random starts"
),
make_option(
c("--n-iterations"),
action = "store",
default = 10,
type = 'integer',
help = "Maximal number of iterations per random start"
),
make_option(
c("--no-group-singletons"),
action = "store_false",
default = TRUE,
help = "Do not group singletons into nearest cluster. If TRUE, assign all singletons to a \"singleton\" group"
),
make_option(
c("--random-seed"),
action = "store",
default = 0,
type = 'integer',
help = "Seed of the random number generator"
)
)
opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file', 'output_text_file'))
# Check parameter values
if ( ! file.exists(opt$input_object_file)){
stop((paste('File', opt$input_object_file, 'does not exist')))
}
if (! is.null(opt$genes_use)){
if (! file.exists(opt$genes_use)){
stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
}else{
genes_use <- readLines(opt$genes_use)
}
}else{
genes_use <- NULL
}
# Now we're hapy with the arguments, load Seurat and do the work
suppressPackageStartupMessages(require(Seurat))
if(opt$input_format == "loom" | opt$output_format == "loom") {
suppressPackageStartupMessages(require(SeuratDisk))
} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
suppressPackageStartupMessages(require(scater))
}
# Input from serialized R object
seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
clustered_object <- FindClusters(seurat_object,
algorithm = opt$algorithm,
modularity.fxn = opt$modularity_fxn,
method = opt$method,
n.start = opt$nrandom_starts,
n.iter = opt$n_iterations,
random.seed = opt$random_seed,
group.singletons = opt$no_group_singletons,
verbose = TRUE,
resolution = opt$resolution,
graph.name = opt$graph_name,
temp.file.location = opt$temp_file_location)
# Summarise the clustering
# Some parameters aren't interesting for reporting purposes (e.g. file
# locations), so hide from the summary
nonreport_params <- c('input_object_file', 'output_object_file', 'help', 'output_text_file', 'tmp_file_location')
opt_table <- data.frame(value=unlist(opt), stringsAsFactors = FALSE)
opt_table <- opt_table[! rownames(opt_table) %in% nonreport_params, , drop = FALSE]
cluster_table <- as.data.frame(table(Idents(clustered_object)))
colnames(cluster_table) <- c('Cluster', 'No. cells')
rownames(cluster_table) <- cluster_table$Cluster
cat(paste(ncol(GetAssayData(clustered_object)), 'cells fall into ', length(levels(Idents(clustered_object))), 'final clusters. Membership numbers:\n'), capture.output(cluster_table[,2, drop = FALSE]), '\nParameter values:\n', capture.output(print(opt_table)), sep = '\n')
# Output to a serialized R object
write_seurat4_object(seurat_object = clustered_object,
output_path = opt$output_object_file,
format = opt$output_format)
# Output variable genes to a simple text file
write.csv(data.frame(cell=names(Idents(clustered_object)), cluster=Idents(clustered_object)), file = opt$output_text_file, row.names = FALSE)