BMI/BMI.cfg

# The configuration file consists of sections, led by a [section] header and followed by name: value entries (name=value is also accepted)
# Lines beginning with '#' or ';' are ignored and may be used to provide comments.

[PATHS]

# Absolute path to the output files will be written
analysis_path: /home/jhz22/DEPICT


[GWAS FILE SETTINGS]

# Absolute path to GWAS summary statistics (can be in plain text or gzip format)
gwas_summary_statistics_file: /home/jhz22/DEPICT/BMI.txt.gz

# Association cutoff used by DEPICT to define associated loci
association_pvalue_cutoff:  5e-8

# Label for output files generated by DEPICT
label_for_output_files: BMI

# Name of p value column
pvalue_col_name: P

# Name of marker name column. Format: <chr:pos>, ie. '6:2321'.  If this column does not exist chr_col and pos_col will be used, then leave if empty.
marker_col_name:

# Name of chromosome column. Leave empty if this column does not exist.
chr_col_name: Chr

# Name of chromosome column. Leave if this column does not exist.
pos_col_name: Pos

# Separator options: 'tab', 'comma', 'semicolon' or 'space'
separator: tab


[PLINK SETTINGS]

# Change to your PLINK v. 1.9 August 5 release (or higher)
plink_executable: /home/jhz22/bin/plink-1.9

# Point this to the PLINK formated genotype data (change only the path to DEPICT if you use the 1000 Genomes data provided with DEPICT)
# genotype_data_plink_prefix: data/genotype_data_plink/CEU_GBR_TSI_unrelated.phase1_release_v3.20101123.snps_indels_svs.genotypes_ldl_teslovich_nature2010
genotype_data_plink_prefix: data/genotype_data_plink/CEU_GBR_TSI_unrelated.phase1_release_v3.20101123.snps_indels_svs.genotypes_noduplicates


[DEPICT SETTINGS]

# The following three steps are need to construct DEPICT loci based on your GWAS summary statistics
step_construct_depict_loci: yes

# The following step is needed to perform DEPICT gene prioritization
step_depict_geneprio: yes

# The following step is needed to perform DEPICT gene set enrichment
step_depict_gsea: yes

# The following step is needed to perform DEPICT tissue enrichment analysis
step_depict_tissueenrichment: yes


[MISC SETTINGS]

# Number of threads used by the DEPICT Java binary
number_of_threads: 12

# Java heap size in mega bytes
heap_size_in_mb: 16000

# Precomputed loci for each 1000 Genomes project SNPs
collection_file: data/collections/ld0.5_collection_1000genomespilot_depict_150429.txt.gz

# The reconstituted gene set files used by DEPICT
reconstituted_genesets_file: data/reconstituted_genesets/reconstituted_genesets_150901.binary

# The tissue expression matrix
tissue_expression_file: data/tissue_expression/GPL570EnsemblGeneExpressionPerTissue_DEPICT20130820_z_withmeshheader.txt

# Mapping from tissue/cell type identifier to tissue name and information
tissue_mapping_file: data/tissue_expression/GPL570EnsemblGeneExpressionPerTissue_DEPICT20130820_z_withmeshheader_mapping.txt

# Gene annotation file
depict_gene_annotation_file: data/mapping_and_annotation_files/GPL570ProbeENSGInfo+HGNC_reformatted.txt

# Number of genes to report for each reconstituted gene set
max_top_genes_for_gene_set: 10

# Number of repititions used to compute false discovery rates
nr_repititions: 50

# Number of permutations used to adjust for biases such as gene length
nr_permutations: 500

# Boundaries of HLA region (Should not be changed)
mhc_start_bp: 25000000
mhc_end_bp: 35000000

# Directory with precomputed background files
background_data_path: data/backgrounds

# Mapping from gene set identifiers to gene set names
go_mapping_file: data/mapping_and_annotation_files/GO.terms_alt_ids_withoutheader.tab
mgi_mapping_file: data/mapping_and_annotation_files/VOC_MammalianPhenotype.rpt
inweb_mapping_file: data/mapping_and_annotation_files/inweb_mapping.tab

# Files used to output eQTL column in gene prioritization result file
eqtl_mapping_file: data/mapping_and_annotation_files/2012-08-08-IlluminaAll96PercentIdentity-ProbeAnnotation-ProbesWithWrongMappingLengthFilteredOut-EnsemblAnnotation.txt
eqtl_file: data/mapping_and_annotation_files/eQTLProbesFDR0.05.txt

# Prioritize genes across entire genome
prioritize_genes_outside_input_loci: no

# Chromosome to be left-out, leave empty if all chromosomes should be included (for bencharmking purposes)
leave_out_chr:

# Number of null GWAS used to run permutations (bias adjustment) and repitions (FDR calcuations)
number_random_runs: 500

# Threshold used when clumping null GWAS
background_plink_clumping_pvalue: 0.5