UGP Pipeline Config Template 0.0.4
Jump to navigation
Jump to search
############################################################# ############################################################# ################## cApTUrE config file ##################### ############################################################# ############################################################# ## Pipeline uses Illumina file naming conventions. This naming convention is described here: ## http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm ## The sample name and lane information of the fastq file are the most important parts for the pipeline to work correctly. ## Boolean options must be specified as TRUE (false values as blanks). [required] # ugp_id is a unique identifier for each project in the format UGP_0001 ugp_id : UGP_0003 # ugp_verson describes the version of the pipeline used ugp_version : 0.0.2 # location of md5 file to check data integrity md5 : /home/srynearson/cApTUrE/Fastq/<FILE> # The path to the reference genome fasta file (include file) fasta : /home/srynearson/cAPTUrE/references/human_g1k_v37.fasta # The path to the project fastq files. # include trailing slash! fastq_dir : /data/srynearson/Fastq/ # The name of the command line and pipeline info log file command_log : cmd.log # The number of CPUs available for Perl to parallel process # Matching to number of fastq file works best cpu : 8 # memory usage in GB. # The Xmx (memory usage) for picard picard_java_Xmx : 250 # The Xmx (memory usage) for GATK RealignerTargetCreator gatk_tc_Xmx : 15 # The Xmx (memory usage) for GATK IndelRealigner gatk_ir_Xmx : 250 # The Xmx (memory usage) for GATK BaseRecalibrator gatk_br_Xmx : 250 # The Xmx (memory usage) for PrintReads gatk_pr_Xmx : 250 # The Xmx (memory usage) for ReduceReads gatk_rr_Xmx : 250 # The Xmx (memory usage) for UnifiedGenotyper gatk_ug_Xmx : 200 # The path to temp dir used by java java_tmp : /tmp/ # Path to VCF files for RealignerTargetCreator knowns known_indel : /home/srynearson/cAPTUrE/background_files/known_indels known_dbSNP : /home/srynearson/cAPTUrE/background_files/known_dbSNP # Path to background bams used running UnifiedGenotyper # not required to run but will improve variant calls unified_bg_bams : /data/srynearson/reducedreads/ # Resource files (including path) for VariantRecalibrator_SNP # Order must match VariantRecalibrator_SNP option below! vqsr_snp_vcf: /data/srynearson/resources/hapmap_3.3.b37.vcf vqsr_snp_vcf: /data/srynearson/resources/1000G_omni2.5.b37.vcf vqsr_snp_vcf: /data/srynearson/resources/1000G_phase1.snps.high_confidence.b37.vcf # Resource files (including path) for VariantRecalibrator_INDEL # Order must match VariantRecalibrator_INDEL option below! vsqr_indel_vcf : /data/srynearson/resources/Mills_and_1000G_gold_standard.indels.b37.vcf vsqr_indel_vcf : /data/srynearson/resources/dbsnp_137.b37.vcf # path to needed software(required). [software] bwa : /usr/local/bwa/ fastqc : /usr/local/FastQC/ picard : /usr/local/picard-tools/ samtools : /usr/local/samtools/ gatk : /usr/local/GenomeAnalysisTK-2.7-2/GenomeAnalysisTK.jar # Pipeline Run Order. [order] ## requires at least two commands to be used #command_order : indexer command_order : fastqc command_order : bwa_aln command_order : bwa_sampe command_order : idxstats command_order : flagstat command_order : MergeSamFiles command_order : MarkDuplicates command_order : RealignerTargetCreator command_order : IndelRealigner command_order : BaseRecalibrator command_order : PrintReads command_order : CollectMultipleMetrics command_order : ReduceReads command_order : UnifiedGenotyper command_order : VariantRecalibrator command_order : ApplyRecalibration # VariantRecalibrator will run both SNP and INDEL version # can be ran independently if desired. #command_order : VariantRecalibrator_SNP #command_order : VariantRecalibrator_INDEL #command_order : ApplyRecalibration_SNP #command_order : ApplyRecalibration_INDEL ######################### FASTQC ############################ # http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ############################################################# [fastqc] outdir : extract : threads : 10 kmers : quiet : ######################### BWA ############################### # http://bio-bwa.sourceforge.net/bwa.shtml ############################################################# [bwa_index] p : a : bwtsw [bwa_aln] n : o : e : d : i : l : k : t : 10 M : O : E : R : c : N : q : 18 I : B : b : [bwa_sampe] a : o : P : TRUE n : N : r : ######################### Picard ######################### # http://picard.sourceforge.net ########################################################## [MergeSamFiles] VALIDATION_STRINGENCY : SILENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : 30000000 CREATE_INDEX : True SORT_ORDER : coordinate ASSUME_SORTED : True MERGE_SEQUENCE_DICTIONARIES : USE_THREADING : True COMMENT : [BuildBamIndex] VALIDATION_STRINGENCY : SILENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : CREATE_INDEX : [CollectMultipleMetrics] ASSUME_SORTED : VALIDATION_STRINGENCY : SILENT PROGRAM : QualityScoreDistribution [MarkDuplicates] VALIDATION_STRINGENCY : SILENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : CREATE_INDEX : True PROGRAM_RECORD_ID : PROGRAM_GROUP_VERSION : PROGRAM_GROUP_COMMAND_LINE : PROGRAM_GROUP_NAME : COMMENT : REMOVE_DUPLICATES : ASSUME_SORTED : True MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP : MAX_FILE_HANDLES_FOR_READ_ENDS_MAP : SORTING_COLLECTION_SIZE_RATIO : READ_NAME_REGEX : OPTICAL_DUPLICATE_PIXEL_DISTANCE : ######################### GATK ########################### # http://www.broadinstitute.org/gatk/ ############################################################ [RealignerTargetCreator] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 70 num_cpu_threads_per_data_thread : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments maxIntervalSize : minReadsAtLocus : mismatchFraction : windowSize : [IndelRealigner] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : num_cpu_threads_per_data_thread : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments consensusDeterminationModel : knownAllele : LODThresholdForCleaning : nWayOut : entropyThreshold : maxConsensuses : maxIsizeForMovement : maxPositionalMoveAllowed : maxReadsForConsensuses : maxReadsForRealignment : maxReadsInMemory : noOriginalAlignmentTags : [BaseRecalibrator] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : num_cpu_threads_per_data_thread : 20 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments binary_tag_name : covariate : deletions_default_quality : indels_context_size : insertions_default_quality : low_quality_tail : lowMemoryMode : maximum_cycle_value : mismatches_context_size : mismatches_default_quality : no_standard_covs : quantizing_levels : solid_nocall_strategy : solid_recal_mode : sort_by_all_columns : [PrintReads] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_cpu_threads_per_data_thread : 20 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments downsample_coverage : number : platform : readGroup : sample_file : sample_name : simplify : [ReduceReads] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments cancer_mode : context_size : dont_compress_read_names : dont_hardclip_low_qual_tails : dont_simplify_reads : dont_use_softclipped_bases : downsample_coverage : hard_clip_to_interval : known : mindel : minimum_mapping_quality : minimum_tail_qualities : minqual : noclip_ad : out : min_pvalue : minvar : [UnifiedGenotyper] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 20 num_cpu_threads_per_data_thread : 4 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments alleles : comp : dbsnp : annotation : contamination_fraction_per_sample_file : min_indel_fraction_per_sample : excludeAnnotation : genotype_likelihoods_model : BOTH genotyping_mode : group : heterozygosity : indel_heterozygosity : max_deletion_fraction : min_base_quality_score : min_indel_count_for_genotyping : pair_hmm_implementation : pcr_error_rate : sample_ploidy : standard_min_confidence_threshold_for_calling : 30.0 standard_min_confidence_threshold_for_emitting : 30.0 output_mode : EMIT_VARIANTS_ONLY annotateNDA : computeSLOD : indelGapContinuationPenalty : indelGapOpenPenalty : input_prior : max_alternate_alleles : onlyEmitSamples : allSitePLs : [VariantRecalibrator_SNP] ## inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 40 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : ## specific arguments ## required resource : hapmap,known=false,training=true,truth=true,prior=15.0 resource : omni,known=false,training=true,truth=false,prior=12.0 resource : 1000G,known=false,training=true,truth=false,prior=10.0 use_annotation : QD use_annotation : HaplotypeScore use_annotation : MQRankSum use_annotation : ReadPosRankSum use_annotation : FS use_annotation : MQ #use_annotation : InbreedingCoeff ## optional dirichlet : ignore_filter : maxGaussians : maxIterations : numBadVariants : 1000 numKMeans : priorCounts : qualThreshold : shrinkage : stdThreshold : target_titv : ts_filter_level : TStranche : maxNegativeGaussians : trustAllPolymorphic : [VariantRecalibrator_INDEL] ## inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 40 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : ## specific arguments ## required resource : mills,known=false,training=true,truth=true,prior=12.0 resource : dbsnp,known=true,training=false,truth=false,prior=2.0 use_annotation : MQRankSum use_annotation : ReadPosRankSum use_annotation : FS use_annotation : DP ## optional dirichlet : ignore_filter : maxGaussians : 4 maxIterations : numBadVariants : 1000 numKMeans : priorCounts : qualThreshold : shrinkage : stdThreshold : target_titv : ts_filter_level : TStranche : maxNegativeGaussians : trustAllPolymorphic : [ApplyRecalibration_SNP] ## inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 40 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : ## specific arguments ts_filter_level : 99.0 [ApplyRecalibration_INDEL] ## inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 40 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : ## specific arguments ts_filter_level : 99.0