Seeding comprehensive analysis in their named directories (e.g., BMI), the repository links to technical issues documented in physalia, Mixed-Models, software-notes and other sister repositories: SUMSTATS, FM-pipeline, PW-pipeline, hess-pipeline, TWAS-pipeline, EWAS-fusion. for fine-mapping, pathway analysis, TWAS, Mendelian randomisation, predictive analytics and other topics as highlighted in the wiki page.
Earlier or broader aspects have been reflected in the following repositories: Haplotype-Analysis, misc, R.
The figure below was generated with eQTL.R.
The Ensembl public MySQL Servers
The following script gives information on genes from ENSEMBL as well as attributes (columns) that contains gene
.
library(biomaRt)
listMarts()
mart <- useMart("ENSEMBL_MART_FUNCGEN")
listDatasets(mart)
mart <- useMart("ensembl")
listDatasets(mart)
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="grch37.ensembl.org", path="/biomart/martservice")
attr <- listAttributes(ensembl)
attr_select <- c('ensembl_gene_id', 'chromosome_name', 'start_position', 'end_position', 'description', 'hgnc_symbol', 'transcription_start_site')
gene <- getBM(attributes = attr_select, mart = ensembl)
filter <- listFilters(ensembl)
searchFilters(mart = ensembl, pattern = "gene")
See also https://sites.google.com/site/jpopgen/wgsa for precompiled annotation. Alternatively,
# GENCODE v19
url <- "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.chr_patch_hapl_scaff.annotation.gtf.gz"
gtf <- rtracklayer::import(url)
gencode <- as.data.frame(gtf)
LDlink: https://ldlink.nci.nih.gov/?tab=home.
NyuWa Chinese Population Variant Database (NCVD): http://bigdata.ibp.ac.cn/NyuWa_variants/
Example code,
library(ontologyIndex)
id <- function(ontology)
{
inflammatory <- grep(ontology$name,pattern="inflammatory")
immune <- grep(ontology$name,pattern="immune")
inf <- union(inflammatory,immune)
list(id=ontology$id[inf],name=ontology$name[inf])
}
# GO
data(go)
goidname <- id(go)
# EFO
file <- "efo.obo"
get_relation_names(file)
efo <- get_ontology(file, extract_tags="everything")
length(efo) # 89
length(efo$id) # 27962
efoidname <- id(efo)
diseases <- get_descendants(efo,"EFO:0000408")
efo_0000540 <- get_descendants(efo,"EFO:0000540")
efo_0000540name <- efo$name[efo_0000540]
isd <- data.frame(efo_0000540,efo_0000540name)
save(efo,diseases,isd,efoidname,goidname, file="work/efo.rda")
write.table(isd,file="efo_0000540.csv",col.names=FALSE,row.names=FALSE,sep=",")
pdf("efo_0000540.pdf",height=15,width=15)
library(ontologyPlot)
onto_plot(efo,efo_0000540)
dev.off()
https://fuma.ctglab.nl/ (https://github.com/Kyoko-wtnb/FUMA-webapp/)
https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/
https://metamap.nlm.nih.gov/MetaMapLite.shtml
- https://hupo.org/human-proteome-project (Resources, https://hupo.org/HPP-Resources)
- https://www.topdownproteomics.org/
http://www.roadmapepigenomics.org/
https://snakemake.github.io/snakemake-workflow-catalog/
https://github.com/hakyimlab/MetaXcan
http://gusevlab.org/projects/fusion/
The relevant URLs are as follows,
- https://cran.r-project.org/web/packages/rentrez/vignettes/rentrez_tutorial.html
- https://pubmed.ncbi.nlm.nih.gov/
- https://www.ncbi.nlm.nih.gov/pmc/pmctopmid/
with example code,
library(rentrez)
entrez_dbs()
entrez_db_links("pubmed")
pubmed_fields <- entrez_db_searchable("pubmed")
# set_entrez_key("")
Sys.getenv("ENTREZ_KEY")
term <- "pQTLs OR (protein AND quantitative AND trait AND loci) AND human [MH] AND (plasma OR Serum)"
r <- entrez_search(db="pubmed",term=term,use_history=TRUE)
class(r)
names(r)
with(r,web_history)
unlink(paste("pubmed",c("fetch","summary"),sep="."))
fields <- c("uid", "pubdate", "sortfirstauthor", "title", "source", "volume", "pages")
for(i in seq(1,with(r,count),50))
{
cat(i+49, "records downloaded\r")
f <- entrez_fetch(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
write.table(f, col.names=FALSE, row.names=FALSE, file="pubmed.fetch", append=TRUE)
s <- entrez_summary(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
e <- extract_from_esummary(s, fields)
write.table(t(e), col.names=FALSE, row.names=FALSE, file="pubmed.summary", append=TRUE, sep="\t")
}
id <- 600807
upload <- entrez_post(db="omim", id=id)
asthma_variants <- entrez_link(dbfrom="omim", db="clinvar", cmd="neighbor_history", web_history=upload)
asthma_variants
snp_links <- entrez_link(dbfrom="clinvar", db="snp", web_history=asthma_variants$web_histories$omim_clinvar, cmd="neighbor_history")
all_links <- entrez_link(dbfrom='pubmed', id=id, db='all')
http://www.sequenceontology.org/
- China Kadoorie Biobank
- Estonian Biobank
- FinGenn
- Japan Biobank
- UK Biobank
- AMS (mailto:[email protected]), Access_019-Access-Management-System-User-Guide-V4.0.pdf, messages.
- Accessing data guide, http://biobank.ctsu.ox.ac.uk/crystal/exinfo.cgi?src=AccessingData.
- AstraZeneca PheWAS Portal, https://azphewas.com/ (CGR Proteogenomics Portal)
- Data access guide 3.2, https://biobank.ndph.ox.ac.uk/~bbdatan/Data_Access_Guide_v3.2.pdf
- DNAnexus, landing, partnerships
- Imputation, http://biobank.ctsu.ox.ac.uk/crystal/crystal/docs/impute_ukb_v1.pdf.
- Gene ATLAS, http://geneatlas.roslin.ed.ac.uk/.
- genebass
- PHESANT.
- Online showcase, https://biobank.ndph.ox.ac.uk/ukb/ (Showcase User Guide).
- Pan-UK Biobank, GWAS sumstats and GitHub.
- COVID-19 data, format and field.