UGP Pipeline Config Template 0.0.4

From Utah Genome Project Wiki
Revision as of 16:54, 3 October 2013 by Admin (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

#############################################################
#############################################################
##################  cApTUrE config file #####################
#############################################################
#############################################################

## Pipeline uses Illumina file naming conventions.  This naming convention is described here:
## http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm
## The sample name and lane information of the fastq file are the most important parts for the pipeline to work correctly.
## Boolean options must be specified as TRUE (false values as blanks).


[required]

# ugp_id is a unique identifier for each project in the format UGP_0001
ugp_id: UGP_0001

# ugp_verson describes the version of the pipeline used
ugp_version: 0.0.2

# location of md5 file to check data integrity
md5: /home/srynearson/cApTUrE/Fastq/<FILE>

# The path to the reference genome fasta file (include file)
fasta: /home/srynearson/cAPTUrE/references/human_g1k_v37.fasta

# The path to the project fastq files.
# include trailing slash!
fastq_dir: /data/srynearson/Fastq/

# The name of the command line and pipeline info log file 
command_log: cmd.log

# The number of CPUs available for Perl to parallel process
# Matching to number of fastq file works best 
cpu: 8

# The Xmx (memory usage) for Picard tools 
# max shoud be set to 125
picard_java_Xmx: 50

# The Xmx (memory usage) for GATK RealignerTargetCreator
gatk_tc_Xmx: 125

# The Xmx (memory usage) for GATK IndelRealigner
gatk_ir_Xmx: 125

# The Xmx (memory usage) for GATK BaseRecalibrator
gatk_br_Xmx: 125

# The Xmx (memory usage) for PrintReads
gatk_pr_Xmx: 125

# The Xmx (memory usage) for ReduceReads
gatk_rr_Xmx: 125

# The Xmx (memory usage) for UnifiedGenotyper
gatk_ug_Xmx: 125

# The path to temp dir used by java
java_tmp: /tmp/

# Path to VCF files for RealignerTargetCreator knowns
known_indel: /home/srynearson/cAPTUrE/background_files/known_indels
known_dbSNP: /home/srynearson/cAPTUrE/background_files/known_dbSNP

# Path to background bams used running  UnifiedGenotyper
# not required to run but will improve variant calls
unified_bg_bams: /data/srynearson/reducedreads/

# Resource files (including path) for VariantRecalibrator_SNP
# Order must match VariantRecalibrator_SNP option below!
vqsr_snp_vcf: /data/srynearson/resources/hapmap_3.3.b37.vcf
vqsr_snp_vcf: /data/srynearson/resources/1000G_omni2.5.b37.vcf
vqsr_snp_vcf: /data/srynearson/resources/1000G_phase1.snps.high_confidence.b37.vcf

# Resource files (including path) for VariantRecalibrator_INDEL
# Order must match VariantRecalibrator_INDEL option below!
vsqr_indel_vcf: /data/srynearson/resources/Mills_and_1000G_gold_standard.indels.b37.vcf
vsqr_indel_vcf: /data/srynearson/resources/dbsnp_137.b37.vcf

# path to needed software(required).
[software]
bwa: /usr/local/bwa/
fastqc: /usr/local/FastQC/
picard: /usr/local/picard-tools/
samtools: /usr/local/samtools/
gatk: /usr/local/GenomeAnalysisTK-2.7-2/GenomeAnalysisTK.jar

# Pipeline Run Order.
[order]
## requires at least two commands to be used
#command_order : indexer
command_order: fastqc
command_order: bwa_aln
command_order: bwa_sampe
command_order: idxstats
command_order: flagstat
command_order: MergeSamFiles
command_order: MarkDuplicates
command_order: RealignerTargetCreator
command_order: IndelRealigner
command_order: BaseRecalibrator
command_order: PrintReads
command_order: CollectMultipleMetrics
command_order: ReduceReads
command_order: UnifiedGenotyper
command_order: VariantRecalibrator
command_order: ApplyRecalibration

# VariantRecalibrator will run both SNP and INDEL version
# can be ran independently if desired.
#command_order : VariantRecalibrator_SNP
#command_order : VariantRecalibrator_INDEL
#command_order : ApplyRecalibration_SNP
#command_order : ApplyRecalibration_INDEL

######################### FASTQC ############################
# http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
#############################################################

[fastqc]
outdir: 
extract: 
threads: 10
kmers: 
quiet: 

######################### BWA ############################### 
# http://bio-bwa.sourceforge.net/bwa.shtml
#############################################################

[bwa_index]
p: 
a: bwtsw

[bwa_aln]
n: 
o: 
e: 
d: 
i: 
l: 
k: 
t: 10
M: 
O: 
E: 
R: 
c: 
N: 
q: 18
I: 
B: 
b: 

[bwa_sampe]
a: 
o: 
P: TRUE
n: 
N: 
r: 

######################### Picard #########################
# http://picard.sourceforge.net
##########################################################

[MergeSamFiles]
VALIDATION_STRINGENCY: SILENT
COMPRESSION_LEVEL: 
MAX_RECORDS_IN_RAM: 30000000
CREATE_INDEX: True
SORT_ORDER: coordinate
ASSUME_SORTED: True
MERGE_SEQUENCE_DICTIONARIES: 
USE_THREADING: True
COMMENT: 

[BuildBamIndex]
VALIDATION_STRINGENCY: SILENT
COMPRESSION_LEVEL: 
MAX_RECORDS_IN_RAM: 
CREATE_INDEX: 

[CollectMultipleMetrics]
ASSUME_SORTED: 
VALIDATION_STRINGENCY: SILENT
PROGRAM: QualityScoreDistribution

[MarkDuplicates]
VALIDATION_STRINGENCY: SILENT
COMPRESSION_LEVEL: 
MAX_RECORDS_IN_RAM: 
CREATE_INDEX: True
PROGRAM_RECORD_ID: 
PROGRAM_GROUP_VERSION: 
PROGRAM_GROUP_COMMAND_LINE: 
PROGRAM_GROUP_NAME: 
COMMENT: 
REMOVE_DUPLICATES: 
ASSUME_SORTED: True
MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP: 
MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: 
SORTING_COLLECTION_SIZE_RATIO: 
READ_NAME_REGEX: 
OPTICAL_DUPLICATE_PIXEL_DISTANCE: 

######################### GATK ###########################
# http://www.broadinstitute.org/gatk/
############################################################

[RealignerTargetCreator]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 80
num_cpu_threads_per_data_thread: 
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
maxIntervalSize: 
minReadsAtLocus: 
mismatchFraction: 
windowSize: 

[IndelRealigner]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 
num_cpu_threads_per_data_thread: 
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
consensusDeterminationModel: 
knownAllele: 
LODThresholdForCleaning: 
nWayOut: 
entropyThreshold: 
maxConsensuses: 
maxIsizeForMovement: 
maxPositionalMoveAllowed: 
maxReadsForConsensuses: 
maxReadsForRealignment: 
maxReadsInMemory: 
noOriginalAlignmentTags: 

[BaseRecalibrator]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 
num_cpu_threads_per_data_thread: 20
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
binary_tag_name: 
covariate: 
deletions_default_quality: 
indels_context_size: 
insertions_default_quality: 
low_quality_tail: 
lowMemoryMode: 
maximum_cycle_value: 
mismatches_context_size: 
mismatches_default_quality: 
no_standard_covs: 
quantizing_levels: 
solid_nocall_strategy: 
solid_recal_mode: 
sort_by_all_columns: 

[PrintReads]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_cpu_threads_per_data_thread: 20
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
downsample_coverage: 
number: 
platform: 
readGroup: 
sample_file: 
sample_name: 
simplify: 

[ReduceReads]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
cancer_mode: 
context_size: 
dont_compress_read_names: 
dont_hardclip_low_qual_tails: 
dont_simplify_reads: 
dont_use_softclipped_bases: 
downsample_coverage: 
hard_clip_to_interval: 
known: 
mindel: 
minimum_mapping_quality: 
minimum_tail_qualities: 
minqual: 
noclip_ad: 
out: 
min_pvalue: 
minvar: 

[UnifiedGenotyper]
# inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 8
num_cpu_threads_per_data_thread: 10
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

# specific arguments
alleles: 
comp: 
dbsnp: 
annotation: 
contamination_fraction_per_sample_file: 
min_indel_fraction_per_sample: 
excludeAnnotation: 
genotype_likelihoods_model: BOTH
genotyping_mode: 
group: 
heterozygosity: 
indel_heterozygosity: 
max_deletion_fraction: 
min_base_quality_score: 
min_indel_count_for_genotyping: 
pair_hmm_implementation: 
pcr_error_rate: 
sample_ploidy: 
standard_min_confidence_threshold_for_calling: 30.0
standard_min_confidence_threshold_for_emitting: 30.0
output_mode: EMIT_VARIANTS_ONLY
annotateNDA: 
computeSLOD: 
indelGapContinuationPenalty: 
indelGapOpenPenalty: 
input_prior: 
max_alternate_alleles: 
onlyEmitSamples: 
allSitePLs: 

[VariantRecalibrator_SNP]
## inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 40
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

## specific arguments
## required
resource: hapmap,known=false,training=true,truth=true,prior=15.0
resource: omni,known=false,training=true,truth=false,prior=12.0
resource: 1000G,known=false,training=true,truth=false,prior=10.0
use_annotation: QD
use_annotation: HaplotypeScore
use_annotation: MQRankSum
use_annotation: ReadPosRankSum
use_annotation: FS
use_annotation: MQ
#use_annotation : InbreedingCoeff
## optional
dirichlet: 
ignore_filter: 
maxGaussians: 
maxIterations: 
numBadVariants: 1000
numKMeans: 
priorCounts: 
qualThreshold: 
shrinkage: 
stdThreshold: 
target_titv: 
ts_filter_level: 
TStranche: 
maxNegativeGaussians: 
trustAllPolymorphic: 

[VariantRecalibrator_INDEL]
## inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 40
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

## specific arguments
## required
resource: mills,known=false,training=true,truth=true,prior=12.0
resource: dbsnp,known=true,training=false,truth=false,prior=2.0
use_annotation: MQRankSum
use_annotation: ReadPosRankSum
use_annotation: FS
use_annotation: DP
## optional
dirichlet: 
ignore_filter: 
maxGaussians: 4
maxIterations: 
numBadVariants: 1000
numKMeans: 
priorCounts: 
qualThreshold: 
shrinkage: 
stdThreshold: 
target_titv: 
ts_filter_level: 
TStranche: 
maxNegativeGaussians: 
trustAllPolymorphic: 

[ApplyRecalibration_SNP]
## inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 40
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

## specific arguments
ts_filter_level: 99.0

[ApplyRecalibration_INDEL]
## inherited arguments
analysis_type: 
baq: 
baqGapOpenPenalty: 
BQSR: 
defaultBaseQualities: 
disable_indel_quals: 
downsample_to_coverage: 
downsample_to_fraction: 
downsampling_type: 
emit_original_quals: 
excludeIntervals: 
gatk_key: 
globalQScorePrior: 
interval_merging: 
interval_padding: 
interval_set_rule: 
intervals: 
keep_program_records: 
log_to_file: 
logging_level: 
maxRuntime: 
maxRuntimeUnits: 
monitorThreadEfficiency: 
nonDeterministicRandomSeed: 
num_bam_file_handles: 
num_threads: 40
pedigree: 
pedigreeString: 
pedigreeValidationType: 
performanceLog: 
phone_home: 
preserve_qscores_less_than: 
read_buffer_size: 
read_filter: 
read_group_black_list: 
reference_sequence: 
remove_program_records: 
tag: 
unsafe: 
useOriginalQualities: 
validation_strictness: 
allowBqsrOnReducedBams: 

## specific arguments
ts_filter_level: 99.0