From 72f4cf9c0ecbd8608d207ee59a5e5bd36a395be8 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Fri, 15 Jul 2011 17:44:31 -0400 Subject: [PATCH 2/8] Walker to perform deterministic annotation of phasing by transmission (to be compatible with RBP's definition of consecutive pairwise phasing) --- .../sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 1d9616aac..fbe6e5b5a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -242,7 +242,7 @@ public class ReadBackedPhasingWalker extends RodWalker KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList("PQ")); + private static final Set KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList(PQ_KEY)); private VariantContext reduceVCToSamples(VariantContext vc, List samplesToPhase) { // for ( String sample : samplesToPhase ) From 1e4798d5d0303df6b9cd42e9f7f2d30956004668 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 15 Jul 2011 19:34:14 -0400 Subject: [PATCH 5/8] Added targets to build.xml to re-run only tests that have failed (integration, unit, performance or pipeline tests). This is very useful and easy. After running any test (for example ant integrationtest) and seeing failures, you can rerun ONLY THE TESTS THAT FAILED by using one of the following commands: ant failed-integration ant failed-pipeline ant failed-test ant failed-performance obviously matching whatever tests you were running and got failures on. This should run only the failed tests, and you can keep using this command until you have fixed everything. (Thanks to David Roazen for major help with ANT) --- build.xml | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/build.xml b/build.xml index 986d89213..91eb1f8e9 100644 --- a/build.xml +++ b/build.xml @@ -785,6 +785,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -819,6 +863,22 @@ + + + + + + + + + + + + + + + + From fd1df31ef0ab60ec735ff5692132441224a02659 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 15 Jul 2011 19:39:42 -0400 Subject: [PATCH 6/8] changing the output directory names for Analyze Covariates --- .../sting/queue/qscripts/RecalibrateBaseQualities.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index 56ca36925..fca420816 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -42,8 +42,8 @@ class RecalibrateBaseQualities extends QScript { val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv") val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv") val recalBam: File = swapExt(bam, ".bam", ".recal.bam") - val path1: String = bam + "before" - val path2: String = bam + "after" + val path1: String = bam + ".before" + val path2: String = bam + ".after" add(cov(bam, recalFile1), recal(bam, recalFile1, recalBam), @@ -83,7 +83,7 @@ class RecalibrateBaseQualities extends QScript { case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { this.resources = R this.recal_file = inRecalFile - this.output_dir = outPath.toString + this.output_dir = outPath this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" } From ed55182a4c5553eb517569567e5193a12eb7e68a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 16 Jul 2011 00:09:00 -0400 Subject: [PATCH 7/8] Removing Broad specific paths from parameters and making them required. This should make it unambiguous for people inside and outside the Broad to use the DataProcessingPipeline (as per request in the GetSatisfaction) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 050604b4e..4d4499990 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -3,10 +3,11 @@ package org.broadinstitute.sting.queue.qscripts import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.function.ListWriterFunction +import org.broadinstitute.sting.queue.extensions.picard._ +import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel +import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode import collection.JavaConversions._ -import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel -import org.broadinstitute.sting.queue.extensions.picard._ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder @@ -29,6 +30,11 @@ class DataProcessingPipeline extends QScript { @Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true) var reference: File = _ + @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) + var dbSNP: File = _ + + @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=true) + var indels: File = _ /**************************************************************************** @@ -42,12 +48,6 @@ class DataProcessingPipeline extends QScript { @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ - @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=false) - var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") - - @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) - var indels: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf") - @Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false) var projectName: String = "project" @@ -295,7 +295,7 @@ class DataProcessingPipeline extends QScript { this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - this.rodBind :+= RodBind("indels", "VCF", qscript.indels) + this.rodBind :+= RodBind("indels", "VCF", indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 this.scatterCount = nContigs @@ -318,7 +318,7 @@ class DataProcessingPipeline extends QScript { case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { this.input_file :+= inBam this.recal_file = inRecalFile - this.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY + this.baq = CalculationMode.CALCULATE_AS_NECESSARY this.out = outBam if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) else if (qscript.intervals != null) this.intervals :+= qscript.intervals From dd92a14b40b6270cf093b38830aab3987b8d0d25 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 16 Jul 2011 00:23:35 -0400 Subject: [PATCH 8/8] Made extra indel VCF optional but DBSNP mandatory. --- .../queue/qscripts/DataProcessingPipeline.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 4d4499990..d55b86d69 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -33,10 +33,6 @@ class DataProcessingPipeline extends QScript { @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) var dbSNP: File = _ - @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=true) - var indels: File = _ - - /**************************************************************************** * Optional Parameters ****************************************************************************/ @@ -45,6 +41,10 @@ class DataProcessingPipeline extends QScript { // @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false) // var sortSamJar: File = _ // + + @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) + var indels: File = _ + @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ @@ -284,7 +284,8 @@ class DataProcessingPipeline extends QScript { this.out = outIntervals this.mismatchFraction = 0.0 this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - this.rodBind :+= RodBind("indels", "VCF", indels) + if (!indels.isEmpty) + this.rodBind :+= RodBind("indels", "VCF", indels) this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" this.jobName = queueLogDir + outIntervals + ".target" @@ -295,7 +296,8 @@ class DataProcessingPipeline extends QScript { this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - this.rodBind :+= RodBind("indels", "VCF", indels) + if (!indels.isEmpty) + this.rodBind :+= RodBind("indels", "VCF", indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 this.scatterCount = nContigs