Skip to content

Commit

Permalink
Add much more information about GTDB genomes
Browse files Browse the repository at this point in the history
  • Loading branch information
erikrikarddaniel committed Jul 14, 2020
1 parent 089a593 commit 68e8d7d
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 87 deletions.
2 changes: 1 addition & 1 deletion conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% set version = "1.9.7" %}
{% set version = "1.9.8" %}

package:
name: pfitmap-db
Expand Down
54 changes: 27 additions & 27 deletions src/R-test/pf-classify.gtdb.04.expect

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions src/R-test/pf-classify.gtdb.05.expect

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions src/R-test/pf-classify.gtdb.06.expect

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions src/R/pf-classify.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(feather))

SCRIPT_VERSION = "1.9.7"
SCRIPT_VERSION = "1.9.8"
ROWS_PER_SEQUENCE_TSV = 1e7

options(warn = 1)
Expand Down Expand Up @@ -144,7 +144,12 @@ if ( gtdb ) {
trank = 'species',
ncbi_taxon_id = ncbi_species_taxid
) %>%
select(accno0, accno1, tdomain:tspecies, trank, ncbi_taxon_id) %>%
select(
accno0, accno1, tdomain:tspecies, trank, ncbi_taxon_id, checkm_completeness, checkm_contamination, checkm_strain_heterogeneity,
contig_count, genome_size, gtdb_genome_representative, gtdb_representative, l50_contigs, l50_scaffolds, longest_contig,
longest_scaffold, mean_contig_length, mean_scaffold_length, n50_contigs, n50_scaffolds, ncbi_bioproject, ncbi_biosample,
ncbi_genbank_assembly_accession, protein_count, scaffold_count
) %>%
as.data.table()
} else {
logmsg(sprintf("Reading NCBI taxonomy from %s", opt$options$taxflat))
Expand Down
2 changes: 1 addition & 1 deletion src/R/pf-db2feather.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(stringr))

SCRIPT_VERSION = "1.9.7"
SCRIPT_VERSION = "1.9.8"

# Options for testing: opt <- list(options = list(gtdb = TRUE, verbose = TRUE, prefix='testing'), args = 'pf-classify.02.sqlite3')
# Get arguments
Expand Down
2 changes: 1 addition & 1 deletion src/R/pf-fasta-unique-taxon-protein.r
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))

SCRIPT_VERSION = "1.9.7"
SCRIPT_VERSION = "1.9.8"

# Testing arguments: opt <- list('options' = list('featherprefix' = 'pf-fasta-unique-taxon-protein.01', 'prank' = 'psubclass', 'trank' = 'tspecies'), args = c('pf-fasta-unique-taxon-protein.01.faa'))
# Get arguments
Expand Down
2 changes: 1 addition & 1 deletion src/R/pf-fetchseqs.r
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
suppressPackageStartupMessages(library(optparse))

# Arguments for testing: opt <- list(options = list(sqlitedb = 'pf-fetchseqs.07.original.sqlite3', fetch = TRUE, verbose = TRUE, sourcedbs = 'refseq,pdb', faalevel='pfamily', faadir='.'))
SCRIPT_VERSION = "1.9.7"
SCRIPT_VERSION = "1.9.8"

# Get arguments
option_list = list(
Expand Down

0 comments on commit 68e8d7d

Please sign in to comment.