UGP Pipeline Config Template 0.0.4
Jump to navigation
Jump to search
############################################################# ############################################################# ################## cApTUrE config file ##################### ############################################################# ############################################################# ## Pipeline uses Illumina file naming conventions. This naming convention is described here: ## http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm ## The sample name and lane information of the fastq file are the most important parts for the pipeline to work correctly. ## Boolean options must be specified as TRUE (false values as blanks). [required] # ugp_id is a unique identifier for each project in the format UGP_0001 ugp_id: UGP_0001 # ugp_verson describes the version of the pipeline used ugp_version: 0.0.2 # location of md5 file to check data integrity md5: /home/srynearson/cApTUrE/Fastq/<FILE> # The path to the reference genome fasta file (include file) fasta: /home/srynearson/cAPTUrE/references/human_g1k_v37.fasta # The path to the project fastq files. # include trailing slash! fastq_dir: /data/srynearson/Fastq/ # The name of the command line and pipeline info log file command_log: cmd.log # The number of CPUs available for Perl to parallel process # Matching to number of fastq file works best cpu: 8 # The Xmx (memory usage) for Picard tools # max shoud be set to 125 picard_java_Xmx: 50 # The Xmx (memory usage) for GATK RealignerTargetCreator gatk_tc_Xmx: 125 # The Xmx (memory usage) for GATK IndelRealigner gatk_ir_Xmx: 125 # The Xmx (memory usage) for GATK BaseRecalibrator gatk_br_Xmx: 125 # The Xmx (memory usage) for PrintReads gatk_pr_Xmx: 125 # The Xmx (memory usage) for ReduceReads gatk_rr_Xmx: 125 # The Xmx (memory usage) for UnifiedGenotyper gatk_ug_Xmx: 125 # The path to temp dir used by java java_tmp: /tmp/ # Path to VCF files for RealignerTargetCreator knowns known_indel: /home/srynearson/cAPTUrE/background_files/known_indels known_dbSNP: /home/srynearson/cAPTUrE/background_files/known_dbSNP # Path to background bams used running UnifiedGenotyper # not required to run but will improve variant calls unified_bg_bams: /data/srynearson/reducedreads/ # Resource files (including path) for VariantRecalibrator_SNP # Order must match VariantRecalibrator_SNP option below! vqsr_snp_vcf: /data/srynearson/resources/hapmap_3.3.b37.vcf vqsr_snp_vcf: /data/srynearson/resources/1000G_omni2.5.b37.vcf vqsr_snp_vcf: /data/srynearson/resources/1000G_phase1.snps.high_confidence.b37.vcf # Resource files (including path) for VariantRecalibrator_INDEL # Order must match VariantRecalibrator_INDEL option below! vsqr_indel_vcf: /data/srynearson/resources/Mills_and_1000G_gold_standard.indels.b37.vcf vsqr_indel_vcf: /data/srynearson/resources/dbsnp_137.b37.vcf # path to needed software(required). [software] bwa: /usr/local/bwa/ fastqc: /usr/local/FastQC/ picard: /usr/local/picard-tools/ samtools: /usr/local/samtools/ gatk: /usr/local/GenomeAnalysisTK-2.7-2/GenomeAnalysisTK.jar # Pipeline Run Order. [order] ## requires at least two commands to be used #command_order : indexer command_order: fastqc command_order: bwa_aln command_order: bwa_sampe command_order: idxstats command_order: flagstat command_order: MergeSamFiles command_order: MarkDuplicates command_order: RealignerTargetCreator command_order: IndelRealigner command_order: BaseRecalibrator command_order: PrintReads command_order: CollectMultipleMetrics command_order: ReduceReads command_order: UnifiedGenotyper command_order: VariantRecalibrator command_order: ApplyRecalibration # VariantRecalibrator will run both SNP and INDEL version # can be ran independently if desired. #command_order : VariantRecalibrator_SNP #command_order : VariantRecalibrator_INDEL #command_order : ApplyRecalibration_SNP #command_order : ApplyRecalibration_INDEL ######################### FASTQC ############################ # http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ############################################################# [fastqc] outdir: extract: threads: 10 kmers: quiet: ######################### BWA ############################### # http://bio-bwa.sourceforge.net/bwa.shtml ############################################################# [bwa_index] p: a: bwtsw [bwa_aln] n: o: e: d: i: l: k: t: 10 M: O: E: R: c: N: q: 18 I: B: b: [bwa_sampe] a: o: P: TRUE n: N: r: ######################### Picard ######################### # http://picard.sourceforge.net ########################################################## [MergeSamFiles] VALIDATION_STRINGENCY: SILENT COMPRESSION_LEVEL: MAX_RECORDS_IN_RAM: 30000000 CREATE_INDEX: True SORT_ORDER: coordinate ASSUME_SORTED: True MERGE_SEQUENCE_DICTIONARIES: USE_THREADING: True COMMENT: [BuildBamIndex] VALIDATION_STRINGENCY: SILENT COMPRESSION_LEVEL: MAX_RECORDS_IN_RAM: CREATE_INDEX: [CollectMultipleMetrics] ASSUME_SORTED: VALIDATION_STRINGENCY: SILENT PROGRAM: QualityScoreDistribution [MarkDuplicates] VALIDATION_STRINGENCY: SILENT COMPRESSION_LEVEL: MAX_RECORDS_IN_RAM: CREATE_INDEX: True PROGRAM_RECORD_ID: PROGRAM_GROUP_VERSION: PROGRAM_GROUP_COMMAND_LINE: PROGRAM_GROUP_NAME: COMMENT: REMOVE_DUPLICATES: ASSUME_SORTED: True MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP: MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: SORTING_COLLECTION_SIZE_RATIO: READ_NAME_REGEX: OPTICAL_DUPLICATE_PIXEL_DISTANCE: ######################### GATK ########################### # http://www.broadinstitute.org/gatk/ ############################################################ [RealignerTargetCreator] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 80 num_cpu_threads_per_data_thread: pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments maxIntervalSize: minReadsAtLocus: mismatchFraction: windowSize: [IndelRealigner] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: num_cpu_threads_per_data_thread: pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments consensusDeterminationModel: knownAllele: LODThresholdForCleaning: nWayOut: entropyThreshold: maxConsensuses: maxIsizeForMovement: maxPositionalMoveAllowed: maxReadsForConsensuses: maxReadsForRealignment: maxReadsInMemory: noOriginalAlignmentTags: [BaseRecalibrator] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: num_cpu_threads_per_data_thread: 20 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments binary_tag_name: covariate: deletions_default_quality: indels_context_size: insertions_default_quality: low_quality_tail: lowMemoryMode: maximum_cycle_value: mismatches_context_size: mismatches_default_quality: no_standard_covs: quantizing_levels: solid_nocall_strategy: solid_recal_mode: sort_by_all_columns: [PrintReads] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_cpu_threads_per_data_thread: 20 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments downsample_coverage: number: platform: readGroup: sample_file: sample_name: simplify: [ReduceReads] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments cancer_mode: context_size: dont_compress_read_names: dont_hardclip_low_qual_tails: dont_simplify_reads: dont_use_softclipped_bases: downsample_coverage: hard_clip_to_interval: known: mindel: minimum_mapping_quality: minimum_tail_qualities: minqual: noclip_ad: out: min_pvalue: minvar: [UnifiedGenotyper] # inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 8 num_cpu_threads_per_data_thread: 10 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: # specific arguments alleles: comp: dbsnp: annotation: contamination_fraction_per_sample_file: min_indel_fraction_per_sample: excludeAnnotation: genotype_likelihoods_model: BOTH genotyping_mode: group: heterozygosity: indel_heterozygosity: max_deletion_fraction: min_base_quality_score: min_indel_count_for_genotyping: pair_hmm_implementation: pcr_error_rate: sample_ploidy: standard_min_confidence_threshold_for_calling: 30.0 standard_min_confidence_threshold_for_emitting: 30.0 output_mode: EMIT_VARIANTS_ONLY annotateNDA: computeSLOD: indelGapContinuationPenalty: indelGapOpenPenalty: input_prior: max_alternate_alleles: onlyEmitSamples: allSitePLs: [VariantRecalibrator_SNP] ## inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 40 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: ## specific arguments ## required resource: hapmap,known=false,training=true,truth=true,prior=15.0 resource: omni,known=false,training=true,truth=false,prior=12.0 resource: 1000G,known=false,training=true,truth=false,prior=10.0 use_annotation: QD use_annotation: HaplotypeScore use_annotation: MQRankSum use_annotation: ReadPosRankSum use_annotation: FS use_annotation: MQ #use_annotation : InbreedingCoeff ## optional dirichlet: ignore_filter: maxGaussians: maxIterations: numBadVariants: 1000 numKMeans: priorCounts: qualThreshold: shrinkage: stdThreshold: target_titv: ts_filter_level: TStranche: maxNegativeGaussians: trustAllPolymorphic: [VariantRecalibrator_INDEL] ## inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 40 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: ## specific arguments ## required resource: mills,known=false,training=true,truth=true,prior=12.0 resource: dbsnp,known=true,training=false,truth=false,prior=2.0 use_annotation: MQRankSum use_annotation: ReadPosRankSum use_annotation: FS use_annotation: DP ## optional dirichlet: ignore_filter: maxGaussians: 4 maxIterations: numBadVariants: 1000 numKMeans: priorCounts: qualThreshold: shrinkage: stdThreshold: target_titv: ts_filter_level: TStranche: maxNegativeGaussians: trustAllPolymorphic: [ApplyRecalibration_SNP] ## inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 40 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: ## specific arguments ts_filter_level: 99.0 [ApplyRecalibration_INDEL] ## inherited arguments analysis_type: baq: baqGapOpenPenalty: BQSR: defaultBaseQualities: disable_indel_quals: downsample_to_coverage: downsample_to_fraction: downsampling_type: emit_original_quals: excludeIntervals: gatk_key: globalQScorePrior: interval_merging: interval_padding: interval_set_rule: intervals: keep_program_records: log_to_file: logging_level: maxRuntime: maxRuntimeUnits: monitorThreadEfficiency: nonDeterministicRandomSeed: num_bam_file_handles: num_threads: 40 pedigree: pedigreeString: pedigreeValidationType: performanceLog: phone_home: preserve_qscores_less_than: read_buffer_size: read_filter: read_group_black_list: reference_sequence: remove_program_records: tag: unsafe: useOriginalQualities: validation_strictness: allowBqsrOnReducedBams: ## specific arguments ts_filter_level: 99.0