-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
178 additions
and
258 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
log | ||
*rda | ||
*.json | ||
tree.json | ||
__pycache__/ | ||
nexus.py | ||
pool-cpp.out | ||
pool.R | ||
pool.cpp | ||
pool.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,9 @@ | ||
all: | ||
morloc typecheck -r main.loc | ||
morloc make main.loc | ||
./nexus.py -h | ||
|
||
.PHONY: | ||
search: | ||
./nexus.py searchEntrez config.json > searchResult.json | ||
all: nexus.py | ||
./nexus.py makeTree data/config.json > tree.json | ||
./nexus.py plot data/config.json tree.json | ||
|
||
.PHONY: | ||
run: | ||
./nexus.py classifyByDate config.json > tree.json | ||
./nexus.py plotCladeTree config.json tree.json | ||
|
||
.PHONY: | ||
makec: | ||
g++ --std=c++17 -o pool-cpp.out pool.cpp -I/home/z/.morloc/include -I/home/z/.morloc/src/morloc/plane/morloclib/bio/tree -I/home/z/.morloc/src/morloc/plane/morloclib/cppbase -I/home/z/.morloc/src/morloc/plane/morloclib/bio/algo -I/home/z/.morloc/src/morloc/plane/morloclib/matrix/eigen | ||
nexus.py: | ||
morloc make main.loc | ||
|
||
.PHONY: clean | ||
clean: | ||
rm -rf pool* nexus* *pdf *rda __pycache__ | ||
rm -rf pool* nexus* *pdf *rda __pycache__ tree.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
{ "mindate" : "2021/01/01" | ||
, "maxdate" : "2021/01/14" | ||
, "reffile" : "refs.txt" | ||
, "reffile" : "data/refs.txt" | ||
, "treefile" : "tree.pdf" | ||
, "email" : "zbwrnz@gmail.com" | ||
, "email" : "wena@mailinator.com" | ||
, "query" : "Influenza+A+Virus[Organism]+H3N2[ALL]+HA[ALL]" | ||
} |
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
module lib.classify (classify) | ||
|
||
import lib.flutypes (Clade) | ||
|
||
-- Import generic functions | ||
import base | ||
( id | ||
, snd | ||
, ifelse | ||
, eq | ||
, size | ||
, fst | ||
, branch | ||
, head | ||
, const | ||
, unique | ||
, ne | ||
, filter | ||
) | ||
|
||
-- Import the tree type and generic functions for operating on it | ||
import bio.tree | ||
( RootedTree | ||
, pullNode | ||
, push | ||
) | ||
|
||
-- Traverse the phylogenetic tree setting clade labels | ||
classify n e a :: RootedTree n e (a, Clade) -> RootedTree Str e (a, Clade) | ||
classify | ||
= push id passClade setLeaf | ||
. pullNode snd pullClade | ||
where | ||
passClade parent edge child = (edge, ifelse (eq 0 (size child)) parent child) | ||
setLeaf parent edge leaf = (edge, (fst leaf, parent)) | ||
pullClade xs | ||
= branch (eq 1 . size) head (const "") seenClades | ||
where | ||
seenClades = ( unique | ||
. filter (ne 0 . size) | ||
) xs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
module lib.flutypes (*) | ||
|
||
import types(Str) | ||
|
||
-- Type aliases | ||
type Accession = Str | ||
type Clade = Str | ||
type Sequence = Str | ||
type Date = Str |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# parseRecord :: JsonObj -> (JsonObj, Sequence) | ||
def parseRecord(jsonObj): | ||
sequence = jsonObj["GBSeq_sequence"].upper() | ||
del jsonObj["GBSeq_sequence"] | ||
return (jsonObj, sequence) | ||
|
||
# labelRef :: Map Accession Clade -> JsonObj -> (JsonObj, Clade) | ||
def labelRef(cladeMap, jsonObj): | ||
accession = jsonObj["GBSeq_primary-accession"] | ||
if accession in cladeMap: | ||
return (jsonObj, cladeMap[accession]) | ||
else: | ||
return (jsonObj, "") | ||
|
||
# setLeafName :: (JsonObj, Clade) -> Str | ||
def setLeafName(meta): | ||
(jsonObj, clade) = meta | ||
return ( clade + "|" + | ||
jsonObj["GBSeq_primary-accession"] + "|" + | ||
jsonObj["GBSeq_length"] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
module lib.retrieve (retrieve, setLeafName, FluConfig) | ||
|
||
import lib.flutypes | ||
( Accession | ||
, Clade | ||
, Date | ||
, Sequence | ||
) | ||
|
||
-- Import functions for accessing public sequence records | ||
import bio.db | ||
( searchEntrez as fetchIds | ||
, nucleotideAccessionToJson as fetchRecords | ||
, EntrezSearchConfig | ||
, EntrezFetchConfig | ||
) | ||
|
||
-- Import generic functions | ||
import base (map, concat, shard, join, keys, onFst, sleep, readMap) | ||
import json (JsonObj) | ||
import types (Filename) | ||
|
||
|
||
-- Define the configuration record | ||
record FluConfig = FluConfig | ||
{ mindate :: Date | ||
, maxdate :: Date | ||
, reffile :: Filename | ||
, treefile :: Filename | ||
, query :: Str | ||
, email :: Str | ||
} | ||
|
||
-- Specify the representation of this record in Python and R | ||
record Py => FluConfig = "dict" | ||
record R => FluConfig = "list" | ||
|
||
-- Source python functions for dealing with Entrez records | ||
source Py from "entrez.py" | ||
( "parseRecord" | ||
, "labelRef" | ||
, "setLeafName" | ||
) | ||
|
||
-- Source C++ alternative for one of these functions | ||
source Cpp from "entrez.hpp" ("setLeafName") | ||
|
||
-- Define the general type of each function | ||
parseRecord :: JsonObj -> (JsonObj, Sequence) | ||
labelRef :: Map Accession Clade -> JsonObj -> (JsonObj, Clade) | ||
setLeafName :: (JsonObj, Sequence) -> Str | ||
|
||
-- Retrieve sequence data from Entrez and tag reference strains | ||
retrieve :: FluConfig -> [((JsonObj, Clade), Sequence)] | ||
retrieve config = | ||
( map (onFst (labelRef refmap)) | ||
. concat | ||
. map ( map parseRecord | ||
. sleep 1.0 | ||
. fetchRecords fetchConfig | ||
) | ||
. shard 30 | ||
. join (keys refmap) | ||
. fetchIds searchConfig | ||
) config@query | ||
where | ||
searchConfig = | ||
{ email = config@email | ||
, db = "nuccore" | ||
, mindate = config@mindate | ||
, maxdate = config@maxdate | ||
, retmax = 1000 | ||
} | ||
fetchConfig = { email = config@email } | ||
refmap = readMap config@reffile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
module lib.treeplot (plotTree) | ||
|
||
import bio.tree(RootedTree) | ||
import types (Filename) | ||
|
||
source R from "plot-tree.R" ("plotTree") | ||
plotTree :: Filename -> RootedTree Str Real Str -> () |
File renamed without changes.
Oops, something went wrong.