Skip to content

Commit

Permalink
Modularize the flu case study
Browse files Browse the repository at this point in the history
  • Loading branch information
arendsee committed May 6, 2024
1 parent bbf2ad4 commit b7fc18d
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 258 deletions.
7 changes: 6 additions & 1 deletion demos/02_flu/.gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
log
*pdf
*rda
*.json
tree.json
__pycache__/
nexus.py
pool-cpp.out
pool.R
pool.cpp
pool.py
24 changes: 6 additions & 18 deletions demos/02_flu/Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,9 @@
all:
morloc typecheck -r main.loc
morloc make main.loc
./nexus.py -h

.PHONY:
search:
./nexus.py searchEntrez config.json > searchResult.json
all: nexus.py
./nexus.py makeTree data/config.json > tree.json
./nexus.py plot data/config.json tree.json

.PHONY:
run:
./nexus.py classifyByDate config.json > tree.json
./nexus.py plotCladeTree config.json tree.json

.PHONY:
makec:
g++ --std=c++17 -o pool-cpp.out pool.cpp -I/home/z/.morloc/include -I/home/z/.morloc/src/morloc/plane/morloclib/bio/tree -I/home/z/.morloc/src/morloc/plane/morloclib/cppbase -I/home/z/.morloc/src/morloc/plane/morloclib/bio/algo -I/home/z/.morloc/src/morloc/plane/morloclib/matrix/eigen
nexus.py:
morloc make main.loc

.PHONY: clean
clean:
rm -rf pool* nexus* *pdf *rda __pycache__
rm -rf pool* nexus* *pdf *rda __pycache__ tree.json
4 changes: 2 additions & 2 deletions demos/02_flu/config.json → demos/02_flu/data/config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ "mindate" : "2021/01/01"
, "maxdate" : "2021/01/14"
, "reffile" : "refs.txt"
, "reffile" : "data/refs.txt"
, "treefile" : "tree.pdf"
, "email" : "zbwrnz@gmail.com"
, "email" : "wena@mailinator.com"
, "query" : "Influenza+A+Virus[Organism]+H3N2[ALL]+HA[ALL]"
}
File renamed without changes.
118 changes: 0 additions & 118 deletions demos/02_flu/entrez.py

This file was deleted.

41 changes: 41 additions & 0 deletions demos/02_flu/lib/classify/main.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
module lib.classify (classify)

import lib.flutypes (Clade)

-- Import generic functions
import base
( id
, snd
, ifelse
, eq
, size
, fst
, branch
, head
, const
, unique
, ne
, filter
)

-- Import the tree type and generic functions for operating on it
import bio.tree
( RootedTree
, pullNode
, push
)

-- Traverse the phylogenetic tree setting clade labels
classify n e a :: RootedTree n e (a, Clade) -> RootedTree Str e (a, Clade)
classify
= push id passClade setLeaf
. pullNode snd pullClade
where
passClade parent edge child = (edge, ifelse (eq 0 (size child)) parent child)
setLeaf parent edge leaf = (edge, (fst leaf, parent))
pullClade xs
= branch (eq 1 . size) head (const "") seenClades
where
seenClades = ( unique
. filter (ne 0 . size)
) xs
9 changes: 9 additions & 0 deletions demos/02_flu/lib/flutypes/main.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module lib.flutypes (*)

import types(Str)

-- Type aliases
type Accession = Str
type Clade = Str
type Sequence = Str
type Date = Str
File renamed without changes.
21 changes: 21 additions & 0 deletions demos/02_flu/lib/retrieve/entrez.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# parseRecord :: JsonObj -> (JsonObj, Sequence)
def parseRecord(jsonObj):
sequence = jsonObj["GBSeq_sequence"].upper()
del jsonObj["GBSeq_sequence"]
return (jsonObj, sequence)

# labelRef :: Map Accession Clade -> JsonObj -> (JsonObj, Clade)
def labelRef(cladeMap, jsonObj):
accession = jsonObj["GBSeq_primary-accession"]
if accession in cladeMap:
return (jsonObj, cladeMap[accession])
else:
return (jsonObj, "")

# setLeafName :: (JsonObj, Clade) -> Str
def setLeafName(meta):
(jsonObj, clade) = meta
return ( clade + "|" +
jsonObj["GBSeq_primary-accession"] + "|" +
jsonObj["GBSeq_length"]
)
75 changes: 75 additions & 0 deletions demos/02_flu/lib/retrieve/main.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
module lib.retrieve (retrieve, setLeafName, FluConfig)

import lib.flutypes
( Accession
, Clade
, Date
, Sequence
)

-- Import functions for accessing public sequence records
import bio.db
( searchEntrez as fetchIds
, nucleotideAccessionToJson as fetchRecords
, EntrezSearchConfig
, EntrezFetchConfig
)

-- Import generic functions
import base (map, concat, shard, join, keys, onFst, sleep, readMap)
import json (JsonObj)
import types (Filename)


-- Define the configuration record
record FluConfig = FluConfig
{ mindate :: Date
, maxdate :: Date
, reffile :: Filename
, treefile :: Filename
, query :: Str
, email :: Str
}

-- Specify the representation of this record in Python and R
record Py => FluConfig = "dict"
record R => FluConfig = "list"

-- Source python functions for dealing with Entrez records
source Py from "entrez.py"
( "parseRecord"
, "labelRef"
, "setLeafName"
)

-- Source C++ alternative for one of these functions
source Cpp from "entrez.hpp" ("setLeafName")

-- Define the general type of each function
parseRecord :: JsonObj -> (JsonObj, Sequence)
labelRef :: Map Accession Clade -> JsonObj -> (JsonObj, Clade)
setLeafName :: (JsonObj, Sequence) -> Str

-- Retrieve sequence data from Entrez and tag reference strains
retrieve :: FluConfig -> [((JsonObj, Clade), Sequence)]
retrieve config =
( map (onFst (labelRef refmap))
. concat
. map ( map parseRecord
. sleep 1.0
. fetchRecords fetchConfig
)
. shard 30
. join (keys refmap)
. fetchIds searchConfig
) config@query
where
searchConfig =
{ email = config@email
, db = "nuccore"
, mindate = config@mindate
, maxdate = config@maxdate
, retmax = 1000
}
fetchConfig = { email = config@email }
refmap = readMap config@reffile
7 changes: 7 additions & 0 deletions demos/02_flu/lib/treeplot/main.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module lib.treeplot (plotTree)

import bio.tree(RootedTree)
import types (Filename)

source R from "plot-tree.R" ("plotTree")
plotTree :: Filename -> RootedTree Str Real Str -> ()
File renamed without changes.
Loading

0 comments on commit b7fc18d

Please sign in to comment.