Difference between revisions of "UGP Pipeline Config Template 0.0.2"
Jump to navigation
Jump to search
(Created page with "<pre> ############################################################# # REQUIRED OPTIONS ############################################################# # http://support.illumin...") |
|||
(2 intermediate revisions by the same user not shown) | |||
Line 5: | Line 5: | ||
############################################################# | ############################################################# | ||
+ | # Pipeline uses Illumina file naming conventions. This naming convention is described here: | ||
# http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm | # http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm | ||
+ | [argv] | ||
+ | |||
+ | # ugp_id is a unique identifier for each project in the format UGP_0001 | ||
+ | ugp_id : UGP_0001 | ||
+ | |||
+ | # ugp_verson describes the version of the pipeline used | ||
+ | ugp_version : 0.0.2 | ||
+ | |||
+ | # ugp_name is a human readable name for the analysis (alphanumeric charachters and underscores only - no spaces). | ||
+ | ugp_name : Shawn_Genome | ||
+ | |||
+ | # The location of the reference genome fasta file | ||
+ | fasta : /home/srynearson/UGP_Pipeline/data/reference/human_g1k_v37.fasta | ||
+ | |||
+ | # The path to the project fastq files | ||
+ | fastq_dir : /home/srynearson/UGP_Pipeline/data/SGF65/ | ||
+ | |||
+ | # The name of the error log file | ||
+ | error_log : error.log | ||
+ | |||
+ | # The name of the command line log file | ||
+ | cmd_log : cmd.log | ||
+ | |||
+ | # The number of CPUs available for analysis | ||
+ | cpu : 10 | ||
+ | |||
+ | # Available RAM in GB | ||
+ | ram : 128 | ||
− | + | # The Xmx (memory usage) argument given to Java | |
− | + | java_Xmx : 8 | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | java_Xmx | ||
− | |||
+ | # The path to temp files used by java | ||
+ | java_tmp : /tmp/ | ||
# Path to VCF files for realignment | # Path to VCF files for realignment | ||
Line 32: | Line 56: | ||
picard : /usr/local/picard-tools-1.90/ | picard : /usr/local/picard-tools-1.90/ | ||
gatk : /usr/local/GenomeAnalysisTK-2.6-5/GenomeAnalysisTK.jar | gatk : /usr/local/GenomeAnalysisTK-2.6-5/GenomeAnalysisTK.jar | ||
+ | |||
+ | # Pipeline Order | ||
+ | |||
+ | [order] | ||
+ | |||
+ | command_order : fastqc | ||
+ | command_order : bwa_index | ||
+ | command_order : bwa_aln | ||
+ | command_order : bwa_sampe | ||
+ | # sorting | ||
+ | command_order : idxstats | ||
+ | command_order : flagstat | ||
+ | command_order : MergeSamFiles | ||
+ | command_order : MarkDuplicates | ||
+ | command_order : RealignerTargetCreator | ||
+ | command_order : IndelRealigner | ||
+ | command_order : BaseRecalibrator | ||
+ | command_order : PrintReads | ||
+ | command_order : CollectMultipleMetrics | ||
+ | command_order : ReduceReads | ||
+ | command_order : UnifiedGenotyper | ||
+ | command_order : VariantRecalibrator | ||
######################### FASTQC ############################ | ######################### FASTQC ############################ | ||
− | # http:// | + | # http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ |
############################################################# | ############################################################# | ||
Line 51: | Line 97: | ||
[bwa_index] | [bwa_index] | ||
p : | p : | ||
− | a : | + | a : bwtsw |
[bwa_aln] | [bwa_aln] | ||
Line 80: | Line 126: | ||
N : | N : | ||
r : | r : | ||
− | |||
######################### Picard ######################### | ######################### Picard ######################### | ||
# http://picard.sourceforge.net | # http://picard.sourceforge.net | ||
########################################################## | ########################################################## | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
[MergeSamFiles] | [MergeSamFiles] | ||
− | VALIDATION_STRINGENCY : | + | VALIDATION_STRINGENCY : LENIENT |
COMPRESSION_LEVEL : | COMPRESSION_LEVEL : | ||
MAX_RECORDS_IN_RAM : 30000000 | MAX_RECORDS_IN_RAM : 30000000 | ||
Line 109: | Line 141: | ||
USE_THREADING : true | USE_THREADING : true | ||
COMMENT : | COMMENT : | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
[SortSam] | [SortSam] | ||
Line 121: | Line 146: | ||
COMPRESSION_LEVEL : | COMPRESSION_LEVEL : | ||
MAX_RECORDS_IN_RAM : | MAX_RECORDS_IN_RAM : | ||
− | CREATE_INDEX : | + | CREATE_INDEX : TRUE |
SORT_ORDER : coordinate | SORT_ORDER : coordinate | ||
Line 131: | Line 156: | ||
[MarkDuplicates] | [MarkDuplicates] | ||
− | VALIDATION_STRINGENCY : | + | VALIDATION_STRINGENCY : LENIENT |
COMPRESSION_LEVEL : | COMPRESSION_LEVEL : | ||
MAX_RECORDS_IN_RAM : | MAX_RECORDS_IN_RAM : | ||
Line 148: | Line 173: | ||
OPTICAL_DUPLICATE_PIXEL_DISTANCE : | OPTICAL_DUPLICATE_PIXEL_DISTANCE : | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
######################### GATK ########################### | ######################### GATK ########################### | ||
Line 282: | Line 225: | ||
# specific arguments | # specific arguments | ||
alleles : | alleles : | ||
− | annotateNDA : | + | annotateNDA : |
computeSLOD : | computeSLOD : | ||
dbsnp : | dbsnp : | ||
excludeAnnotation : | excludeAnnotation : | ||
− | genotype_likelihoods_model : | + | genotype_likelihoods_model : |
genotyping_mode : | genotyping_mode : | ||
group : | group : | ||
Line 717: | Line 660: | ||
[UnifiedGenotyper] | [UnifiedGenotyper] | ||
# inherited arguments | # inherited arguments | ||
− | analysis_type : | + | analysis_type : |
baq : | baq : | ||
baqGapOpenPenalty : | baqGapOpenPenalty : |
Latest revision as of 19:40, 27 August 2013
############################################################# # REQUIRED OPTIONS ############################################################# # Pipeline uses Illumina file naming conventions. This naming convention is described here: # http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm [argv] # ugp_id is a unique identifier for each project in the format UGP_0001 ugp_id : UGP_0001 # ugp_verson describes the version of the pipeline used ugp_version : 0.0.2 # ugp_name is a human readable name for the analysis (alphanumeric charachters and underscores only - no spaces). ugp_name : Shawn_Genome # The location of the reference genome fasta file fasta : /home/srynearson/UGP_Pipeline/data/reference/human_g1k_v37.fasta # The path to the project fastq files fastq_dir : /home/srynearson/UGP_Pipeline/data/SGF65/ # The name of the error log file error_log : error.log # The name of the command line log file cmd_log : cmd.log # The number of CPUs available for analysis cpu : 10 # Available RAM in GB ram : 128 # The Xmx (memory usage) argument given to Java java_Xmx : 8 # The path to temp files used by java java_tmp : /tmp/ # Path to VCF files for realignment known_indel : /home/srynearson/UGP_Pipeline/data/known_indels/ known_dbSNP : /home/srynearson/UGP_Pipeline/data/known_dbSNP/ # Path to background for Unified variant calling unified_background : # Software paths bwa : /usr/local/bwa/ fastqc : /usr/local/FastQC/ samtools : /usr/local/samtools/ picard : /usr/local/picard-tools-1.90/ gatk : /usr/local/GenomeAnalysisTK-2.6-5/GenomeAnalysisTK.jar # Pipeline Order [order] command_order : fastqc command_order : bwa_index command_order : bwa_aln command_order : bwa_sampe # sorting command_order : idxstats command_order : flagstat command_order : MergeSamFiles command_order : MarkDuplicates command_order : RealignerTargetCreator command_order : IndelRealigner command_order : BaseRecalibrator command_order : PrintReads command_order : CollectMultipleMetrics command_order : ReduceReads command_order : UnifiedGenotyper command_order : VariantRecalibrator ######################### FASTQC ############################ # http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ ############################################################# [fastqc] outdir : extract : threads : 4 kmers : quiet : ######################### BWA ############################### # http://bio-bwa.sourceforge.net/bwa.shtml ############################################################# [bwa_index] p : a : bwtsw [bwa_aln] n : o : e : d : i : l : k : t : 10 M : O : E : R : c : N : q : 15 I : B : b : [bwa_sampe] a : o : P : TRUE n : N : r : ######################### Picard ######################### # http://picard.sourceforge.net ########################################################## [MergeSamFiles] VALIDATION_STRINGENCY : LENIENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : 30000000 CREATE_INDEX : true SORT_ORDER : coordinate ASSUME_SORTED : true MERGE_SEQUENCE_DICTIONARIES : USE_THREADING : true COMMENT : [SortSam] VALIDATION_STRINGENCY : LENIENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : CREATE_INDEX : TRUE SORT_ORDER : coordinate [BuildBamIndex] VALIDATION_STRINGENCY : COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : CREATE_INDEX : [MarkDuplicates] VALIDATION_STRINGENCY : LENIENT COMPRESSION_LEVEL : MAX_RECORDS_IN_RAM : CREATE_INDEX : PROGRAM_RECORD_ID : PROGRAM_GROUP_VERSION : PROGRAM_GROUP_COMMAND_LINE : PROGRAM_GROUP_NAME : COMMENT : REMOVE_DUPLICATES : ASSUME_SORTED : True MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP : MAX_FILE_HANDLES_FOR_READ_ENDS_MAP : SORTING_COLLECTION_SIZE_RATIO : READ_NAME_REGEX : OPTICAL_DUPLICATE_PIXEL_DISTANCE : ######################### GATK ########################### # http://www.broadinstitute.org/gatk/ ############################################################ [UnifiedGenotyper] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments alleles : annotateNDA : computeSLOD : dbsnp : excludeAnnotation : genotype_likelihoods_model : genotyping_mode : group : heterozygosity : indel_heterozygosity : max_deletion_fraction : min_base_quality_score : output_mode : pair_hmm_implementation : pcr_error_rate : sample_ploidy : allSitePLs : indelGapContinuationPenalty : indelGapOpenPenalty : input_prior : max_alternate_alleles : [VariantRecalibrator] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments recal_file : resource : tranches_file : use_annotation : dirichlet : ignore_filter : maxGaussians : maxIterations : minNumBadVariants : mode : numKMeans : percentBadVariants : priorCounts : qualThreshold : rscript_file : shrinkage : stdThreshold : target_titv : ts_filter_level : TStranche : trustAllPolymorphic : [ApplyRecalibration] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments tranches_file : ignore_filter : mode : ts_filter_level : [RealignerTargetCreator] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : 24 pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments maxIntervalSize : minReadsAtLocus : mismatchFraction : windowSize : [IndelRealigner] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments consensusDeterminationModel : knownAllele : LODThresholdForCleaning : nWayOut : entropyThreshold : maxConsensuses : maxIsizeForMovement : maxPositionalMoveAllowed : maxReadsForConsensuses : maxReadsForRealignment : maxReadsInMemory : noOriginalAlignmentTags : [BaseRecalibrator] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments binary_tag_name : covariate : deletions_default_quality : indels_context_size : insertions_default_quality : low_quality_tail : lowMemoryMode : maximum_cycle_value : mismatches_context_size : mismatches_default_quality : no_standard_covs : quantizing_levels : solid_nocall_strategy : solid_recal_mode : sort_by_all_columns : [ReduceReads] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments cancer_mode : context_size : dont_compress_read_names : dont_hardclip_low_qual_tails : dont_simplify_reads : dont_use_softclipped_bases : downsample_coverage : hard_clip_to_interval : known : mindel : minimum_mapping_quality : minimum_tail_qualities : minqual : noclip_ad : out : min_pvalue : minvar : [PrintReads] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments downsample_coverage : number : platform : readGroup : sample_file : sample_name : simplify : [UnifiedGenotyper] # inherited arguments analysis_type : baq : baqGapOpenPenalty : BQSR : defaultBaseQualities : disable_indel_quals : downsample_to_coverage : downsample_to_fraction : downsampling_type : emit_original_quals : excludeIntervals : gatk_key : globalQScorePrior : interval_merging : interval_padding : interval_set_rule : intervals : keep_program_records : log_to_file : logging_level : maxRuntime : maxRuntimeUnits : monitorThreadEfficiency : nonDeterministicRandomSeed : num_bam_file_handles : num_threads : pedigree : pedigreeString : pedigreeValidationType : performanceLog : phone_home : preserve_qscores_less_than : read_buffer_size : read_filter : read_group_black_list : reference_sequence : remove_program_records : tag : unsafe : useOriginalQualities : validation_strictness : allowBqsrOnReducedBams : # specific arguments alleles : comp : dbsnp : annotation : contamination_fraction_per_sample_file : min_indel_fraction_per_sample : excludeAnnotation : genotype_likelihoods_model : genotyping_mode : group : heterozygosity : indel_heterozygosity : max_deletion_fraction : min_base_quality_score : min_indel_count_for_genotyping : output_mode : pair_hmm_implementation : pcr_error_rate : sample_ploidy : standard_min_confidence_threshold_for_calling : standard_min_confidence_threshold_for_emitting : annotateNDA : computeSLOD : indelGapContinuationPenalty : indelGapOpenPenalty : input_prior : max_alternate_alleles : onlyEmitSamples : allSitePLs :