From 66b5646f958caa2ea9e72df85f2bce4ca315e08a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 11 Oct 2011 13:56:00 -0400 Subject: [PATCH 01/27] Adding hidden options to the DPP controlling the default platform parameter to Count Covariates and the number of scatter gather jobs to generate are now available under hidden parameters --- .../qscripts/DataProcessingPipeline.scala | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index f97ce4884..cb6bab901 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -12,6 +12,7 @@ import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.function.ListWriterFunction +import org.broadinstitute.sting.commandline.Hidden class DataProcessingPipeline extends QScript { qscript => @@ -71,12 +72,24 @@ class DataProcessingPipeline extends QScript { var noValidation: Boolean = false + /**************************************************************************** + * Hidden Parameters + ****************************************************************************/ + @Hidden + @Input(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false) + var nContigs: Int = -1 + + @Hidden + @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) + var defaultPlatform: String = _ + + /**************************************************************************** * Global Variables ****************************************************************************/ val queueLogDir: String = ".qlog/" // Gracefully hide Queue's output - var nContigs: Int = 0 // Use the number of contigs for scatter gathering jobs + var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS @@ -210,7 +223,8 @@ class DataProcessingPipeline extends QScript { // keep a record of the number of contigs in the first bam file in the list val bams = QScriptUtils.createListFromFile(input) - nContigs = QScriptUtils.getNumberOfContigs(bams(0)) + if (nContigs < 0) + nContigs = QScriptUtils.getNumberOfContigs(bams(0)) val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)} @@ -325,6 +339,7 @@ class DataProcessingPipeline extends QScript { this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam this.recal_file = outRecalFile + if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) else if (qscript.intervals != null) this.intervals :+= qscript.intervals this.scatterCount = nContigs From 0939d16a8de200b8f80b047b4b0cb0e67ce5af1b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 13 Oct 2011 13:22:05 -0400 Subject: [PATCH 03/27] String not empty bug Apparently var X: String = _ is not the same as var X: String = "". :( --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index cb6bab901..968ca9c26 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -81,7 +81,7 @@ class DataProcessingPipeline extends QScript { @Hidden @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) - var defaultPlatform: String = _ + var defaultPlatform: String = "" /**************************************************************************** From e12ffb654730e3553449302b395661026c117ad9 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 13 Oct 2011 13:27:00 -0400 Subject: [PATCH 04/27] Updating docs for GCContentByInterval This walker does not take any BAMs. It only walks over the reference. --- .../gatk/walkers/coverage/GCContentByIntervalWalker.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java index 5c2a967b9..17b17764b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java @@ -42,12 +42,12 @@ import java.util.List; * *

Input

*

- * One or more BAM files. + * A reference file *

* *

Output

*

- * GC content calculations per interval. + * GC content calculations per interval. *

* *

Examples

@@ -56,7 +56,6 @@ import java.util.List; * -R ref.fasta \ * -T GCContentByInterval \ * -o output.txt \ - * -I input.bam \ * -L input.intervals * * From fd4540cd323e85396a4ba5d2bb2b8ee52f88e766 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Oct 2011 13:37:55 -0400 Subject: [PATCH 05/27] Fixed extraordinarily subtle race condition with contracts invariant -- all of the methods in the class must be synchronized or the internal state can be inconsistent with the contract invariant when entering the class in a non-synchronized method, even when that method doesn't care about the object's internal state --- .../org/broadinstitute/sting/utils/SimpleTimer.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java index 6d228640f..a5ac10250 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java +++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -46,7 +46,7 @@ public class SimpleTimer { * @return the name associated with this timer */ @Ensures("result != null") - public String getName() { + public synchronized String getName() { return name; } @@ -82,14 +82,14 @@ public class SimpleTimer { /** * @return is this timer running? */ - public boolean isRunning() { + public synchronized boolean isRunning() { return running; } /** * @return A convenience function to obtain the current time in milliseconds from this timer */ - public long currentTime() { + public synchronized long currentTime() { return System.currentTimeMillis(); } @@ -119,8 +119,4 @@ public class SimpleTimer { public synchronized double getElapsedTime() { return (running ? (currentTime() - startTime + elapsed) : elapsed) / 1000.0; } - - public void printElapsedTime(PrintStream out) { - out.printf("SimpleTimer %s: %.2f%n", name, getElapsedTime()); - } } From 4108a294f76e051f9c14cc8884e49a90a46411c9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Oct 2011 13:58:46 -0400 Subject: [PATCH 06/27] Better error message when a RodBinding file doesn't exist --- .../sting/commandline/ArgumentTypeDescriptor.java | 2 +- .../broadinstitute/sting/utils/exceptions/UserException.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 5fff8f609..94157963f 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -380,7 +380,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( tribbleType == null ) if ( ! file.canRead() | ! file.isFile() ) { - throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file); + throw new UserException.CouldNotReadInputFile(file, "file does exist or couldn't be read"); } else { throw new UserException.CommandLineException( String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 274c64f42..9d131ae0c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -111,6 +111,10 @@ public class UserException extends ReviewedStingException { super(String.format("Couldn't read file because %s caused by %s", message, e.getMessage())); } + public CouldNotReadInputFile(File file) { + super(String.format("Couldn't read file %s", file.getAbsolutePath())); + } + public CouldNotReadInputFile(File file, String message) { super(String.format("Couldn't read file %s because %s", file.getAbsolutePath(), message)); } From 6b02354d8467d8435d4077195ac0b10a4a1ef3b7 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 17 Oct 2011 14:34:52 -0400 Subject: [PATCH 07/27] Adding a new getter in VariantsToTable to extract the indel event length. --- .../gatk/walkers/variantutils/VariantsToTable.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 81d0c36ac..7b0b81d2a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -294,6 +295,14 @@ public class VariantsToTable extends RodWalker { return x.toString(); } }); + getters.put("EVENTLENGTH", new Getter() { public String get(VariantContext vc) { + int maxLength = 0; + for ( final Allele a : vc.getAlternateAlleles() ) { + final int length = a.length() - vc.getReference().length(); + if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; } + } + return Integer.toString(maxLength); + }}); getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { if ( vc.isSNP() && vc.isBiallelic() ) @@ -304,7 +313,6 @@ public class VariantsToTable extends RodWalker { getters.put("FILTER", new Getter() { public String get(VariantContext vc) { return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } }); - getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); From a7cf9cdc67e14a9298e013df42beda06caba7595 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Oct 2011 15:25:35 -0400 Subject: [PATCH 08/27] Fixing error message typo --- .../sting/commandline/ArgumentTypeDescriptor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 94157963f..14275c57b 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -380,7 +380,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( tribbleType == null ) if ( ! file.canRead() | ! file.isFile() ) { - throw new UserException.CouldNotReadInputFile(file, "file does exist or couldn't be read"); + throw new UserException.CouldNotReadInputFile(file, "file does't exist or couldn't be read"); } else { throw new UserException.CommandLineException( String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + From ec911ce5bb8c3873dbfd0a0eb409aaed5d6a82a6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Oct 2011 15:27:22 -0400 Subject: [PATCH 09/27] Even better error messages --- .../sting/commandline/ArgumentTypeDescriptor.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 14275c57b..b54e7c7b1 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -379,8 +379,10 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } if ( tribbleType == null ) - if ( ! file.canRead() | ! file.isFile() ) { - throw new UserException.CouldNotReadInputFile(file, "file does't exist or couldn't be read"); + if ( ! file.exists() ) { + throw new UserException.CouldNotReadInputFile(file, "file does not exist"); + } else if ( ! file.canRead() | ! file.isFile() ) { + throw new UserException.CouldNotReadInputFile(file, "file could not be read"); } else { throw new UserException.CommandLineException( String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + From c1329c4ddee4ce4330ff5046003d9e389f0c6f43 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 17 Oct 2011 15:29:45 -0400 Subject: [PATCH 10/27] Fixing a binary to logical or --- .../sting/commandline/ArgumentTypeDescriptor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index b54e7c7b1..d1d9cf7fe 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -381,7 +381,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( tribbleType == null ) if ( ! file.exists() ) { throw new UserException.CouldNotReadInputFile(file, "file does not exist"); - } else if ( ! file.canRead() | ! file.isFile() ) { + } else if ( ! file.canRead() || ! file.isFile() ) { throw new UserException.CouldNotReadInputFile(file, "file could not be read"); } else { throw new UserException.CommandLineException( From 1e6794c53985f02b27d0f7418c0d1784b28ad754 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 17 Oct 2011 15:56:02 -0400 Subject: [PATCH 12/27] fixing typo in VariantsToTable docs --- .../sting/gatk/walkers/variantutils/VariantsToTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 7b0b81d2a..bf16b2dfb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -134,7 +134,7 @@ public class VariantsToTable extends RodWalker { /** * By default, this tool throws a UserException when it encounters a field without a value in some record. This - * is generally useful when you mistype -F CHROM, so that you get a friendly warning about CHRMO not being + * is generally useful when you mistype -F CHROM, so that you get a friendly warning about CHROM not being * found before the tool runs through 40M 1000G records. However, in some cases you genuinely want to allow such * fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error. From 1a92ee3593417941a31b32e60cb34796d9bd5d95 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 18 Oct 2011 10:57:02 -0400 Subject: [PATCH 14/27] No longer adds a binding of ID -> . when the ID field is dot in the VCF -- Really we should make ID a primary key in VariantContext. Putting it into the attributes is just annoying now --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index c86b91b79..6b101ca74 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -381,7 +381,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, } } - attributes.put(VariantContext.ID_KEY, id); + if ( ! id.equals(VCFConstants.EMPTY_ID_FIELD) ) + attributes.put(VariantContext.ID_KEY, id); return attributes; } From f77f2eeb7d1abf58766f430974f0c7c395740f0e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 18 Oct 2011 13:04:43 -0400 Subject: [PATCH 15/27] Fix for new ID structure --- .../sting/gatk/walkers/variantutils/VariantsToTable.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index bf16b2dfb..5dbd3f5fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -313,6 +313,7 @@ public class VariantsToTable extends RodWalker { getters.put("FILTER", new Getter() { public String get(VariantContext vc) { return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } }); + getters.put("ID", new Getter() { public String get(VariantContext vc) { return vc.hasID() ? vc.getID() : "."; } }); getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); From e5fc82854609f90df8be3704e6f3cd5f482b5a91 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Tue, 18 Oct 2011 14:47:39 -0400 Subject: [PATCH 16/27] With Khalid's implicit approval, I have removed this line that overrides the memory limit of the VCF-gathering function, so that the inherited limit remains --- .../sting/queue/extensions/gatk/VcfGatherFunction.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index d70022147..70046c913 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -36,8 +36,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] override def freezeFieldValues { - this.memoryLimit = Some(1) - this.jarFile = this.originalGATK.jarFile this.reference_sequence = this.originalGATK.reference_sequence this.intervals = this.originalGATK.intervals From 6cadaa84c977d0cecedb3871c354ecf152526fce Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:48:23 -0400 Subject: [PATCH 18/27] Just use validate() from super class since it does the same thing --- .../utils/variantcontext/MutableGenotype.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java index 0cd684cb6..14419a2a0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableGenotype.java @@ -40,19 +40,7 @@ public class MutableGenotype extends Genotype { */ public void setAlleles(List alleles) { this.alleles = new ArrayList(alleles); - - // todo -- add validation checking here - - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); - - int nNoCalls = 0; - for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); - - for ( Allele allele : alleles ) - if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype"); + validate(); } public void setPhase(boolean isPhased) { From 48c4a8cb335e9df5b336bc869a803a2f0b9e98ba Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:49:16 -0400 Subject: [PATCH 19/27] Make error messages clearer (even I was confused) --- .../sting/utils/variantcontext/VariantContext.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index eac6d70b5..14a680af4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1357,10 +1357,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); } - // deal with the case where the first allele isn't the reference + // deal with the case where the first allele isn't the reference if ( a.isReference() ) { if ( sawRef ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at most one reference allele: " + alleles); alleleList.add(0, a); sawRef = true; } @@ -1372,7 +1372,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list"); if ( alleleList.get(0).isNonReference() ) - throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + throw new IllegalArgumentException("Alleles for a VariantContext must contain at least one reference allele: " + alleles); return alleleList; } From 5a6468c11ea1f3e72e4079f2073d66c0bff00e99 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 11:52:05 -0400 Subject: [PATCH 20/27] Allowing ./X genotypes and adding a unit test to ensure that this case is covered from now on (especially given that we may want to revert in the future). Reverting this change is really easy and entails uncommenting a few lines of code. But for now, despite Mark's objections, this case is allowed in the VCF spec and we are wrong not to allow it. --- .../sting/utils/variantcontext/Genotype.java | 10 ++++++---- .../variantcontext/VariantContextUnitTest.java | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 7ab3f81f0..5639ef30e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -197,14 +197,16 @@ public class Genotype { if ( alleles == null ) return; if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); - int nNoCalls = 0; + // int nNoCalls = 0; for ( Allele allele : alleles ) { if ( allele == null ) throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - nNoCalls += allele.isNoCall() ? 1 : 0; + // nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); + + // Technically, the spec does allow for the below case so this is not an illegal state + //if ( nNoCalls > 0 && nNoCalls != alleles.size() ) + // throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); } public String getGenotypeString() { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 9026f09d6..a5060e54e 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -252,6 +252,23 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getSampleNames().size(), 0); } + @Test + public void testCreatingPartiallyCalledGenotype() { + List alleles = Arrays.asList(Aref, C); + Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10); + VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g)); + + Assert.assertTrue(vc.isSNP()); + Assert.assertEquals(vc.getNAlleles(), 2); + Assert.assertTrue(vc.hasGenotypes()); + Assert.assertFalse(vc.isMonomorphic()); + Assert.assertTrue(vc.isPolymorphic()); + Assert.assertEquals(vc.getGenotype("foo"), g); + Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called + Assert.assertEquals(vc.getChromosomeCount(Aref), 0); + Assert.assertEquals(vc.getChromosomeCount(C), 1); + } + @Test (expectedExceptions = IllegalArgumentException.class) public void testBadConstructorArgs1() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref)); From d8d73fe4f2ffef079edd495066ae499de14a919a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 19 Oct 2011 15:11:13 -0400 Subject: [PATCH 21/27] Treat ./X genotypes as MIXED so that isHet, isHom, etc. still return the expected and correct values. Added docs to these accessors with contracts explicitly mentioned. Fixed case where NPE could be thrown. --- .../sting/utils/variantcontext/Genotype.java | 61 +++++++++++++++---- .../utils/variantcontext/VariantContext.java | 11 +++- .../VariantContextUnitTest.java | 6 ++ 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 5639ef30e..e2e44e2b9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -108,14 +108,19 @@ public class Genotype { /** * @return the ploidy of this genotype */ - public int getPloidy() { return alleles.size(); } + public int getPloidy() { + if ( alleles == null ) + throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype"); + return alleles.size(); + } public enum Type { NO_CALL, HOM_REF, HET, HOM_VAR, - UNAVAILABLE + UNAVAILABLE, + MIXED // no-call and call in the same genotype } public Type getType() { @@ -129,36 +134,68 @@ public class Genotype { if ( alleles == null ) return Type.UNAVAILABLE; - Allele firstAllele = alleles.get(0); + boolean sawNoCall = false, sawMultipleAlleles = false; + Allele observedAllele = null; - if ( firstAllele.isNoCall() ) { - return Type.NO_CALL; + for ( Allele allele : alleles ) { + if ( allele.isNoCall() ) + sawNoCall = true; + else if ( observedAllele == null ) + observedAllele = allele; + else if ( !allele.equals(observedAllele) ) + sawMultipleAlleles = true; } - for (Allele a : alleles) { - if ( ! firstAllele.equals(a) ) - return Type.HET; + if ( sawNoCall ) { + if ( observedAllele == null ) + return Type.NO_CALL; + return Type.MIXED; } - return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; + + if ( observedAllele == null ) + throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); + + return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; } /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt) + * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. */ public boolean isHom() { return isHomRef() || isHomVar(); } + + /** + * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. + */ public boolean isHomRef() { return getType() == Type.HOM_REF; } + + /** + * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. + */ public boolean isHomVar() { return getType() == Type.HOM_VAR; } /** - * @return true if we're het (observed alleles differ) + * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. */ public boolean isHet() { return getType() == Type.HET; } /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) + * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. */ public boolean isNoCall() { return getType() == Type.NO_CALL; } + + /** + * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). + */ public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; } + + /** + * @return true if this genotype is comprised of both calls and no-calls. + */ + public boolean isMixed() { return getType() == Type.MIXED; } + + /** + * @return true if the type of this genotype is set. + */ public boolean isAvailable() { return getType() != Type.UNAVAILABLE; } // diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 14a680af4..f52a7087b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -998,7 +998,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati else if ( g.isHomVar() ) genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++; else - throw new IllegalStateException("Genotype of unknown type: " + g); + genotypeCounts[Genotype.Type.MIXED.ordinal()]++; } } } @@ -1042,6 +1042,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]; } + /** + * Genotype-specific functions -- how many mixed calls are there in the genotypes? + * + * @return number of mixed calls + */ + public int getMixedCount() { + return genotypeCounts[Genotype.Type.MIXED.ordinal()]; + } + // --------------------------------------------------------------------------------------------------------- // // validation: extra-strict validation routines for paranoid users diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index a5060e54e..a4d78b637 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -267,6 +267,12 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called Assert.assertEquals(vc.getChromosomeCount(Aref), 0); Assert.assertEquals(vc.getChromosomeCount(C), 1); + Assert.assertFalse(vc.getGenotype("foo").isHet()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertFalse(vc.getGenotype("foo").isNoCall()); + Assert.assertFalse(vc.getGenotype("foo").isHom()); + Assert.assertTrue(vc.getGenotype("foo").isMixed()); + Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED); } @Test (expectedExceptions = IllegalArgumentException.class) From cd8a6d62bba320d276455d93b73290a61d6a203c Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 19 Oct 2011 17:42:37 -0400 Subject: [PATCH 22/27] You know how the wiki has a big section on commiting local changes to BRANCHES of the repository you clone it from? Yeah. It sucks if you don't do that. This commit contains: - IntronLossGenotyper is brought into its current incarnation - A couple of simple new filters (ReadName is super useful for debugging, MateUnmapped is useful for selecting out reads that may have a relevant unaligned mate) - RFA now matches my current local repository. It's in flux since I'm transitioning to the new traversal type. + the triggering read stash pilot required me to change the scope of some of the variables in the ReadClipping code, private -> protected. Those are all the changes there. - MendelianViolation restored to its former glory (and an annotator module that uses the likelihood calculation has been added) + use this rather than a hard GQ threshold if you're doing MV analyses. - Some miscellaneous QScripts --- .../sting/gatk/filters/ReadNameFilter.java | 23 ++++++++ .../walkers/annotator/MVLikelihoodRatio.java | 58 +++++++++++++++++++ .../sting/utils/MendelianViolation.java | 42 ++++++++++++++ 3 files changed, 123 insertions(+) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java new file mode 100755 index 000000000..a56af56d1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/19/11 + * Time: 4:09 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadNameFilter extends ReadFilter { + @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true) + private String readName; + + public boolean filterOut(final SAMRecord rec) { + return ! rec.getReadName().equals(readName); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java new file mode 100755 index 000000000..e0a2329f8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 9/14/11 + * Time: 12:24 PM + * To change this template use File | Settings | File Templates. + */ +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private MendelianViolation mendelianViolation = null; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( mendelianViolation == null ) { + if ( walker instanceof VariantAnnotator ) { + mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); + } + else { + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + } + } + + Map toRet = new HashMap(1); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && + vc.hasGenotype(mendelianViolation.getSampleDad()) && + vc.hasGenotype(mendelianViolation.getSampleMom()); + if ( hasAppropriateGenotypes ) + toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("MVLR"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index e62a7e512..cf45dab79 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -26,6 +26,9 @@ public class MendelianViolation { double minGenotypeQuality; + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); public String getSampleMom() { @@ -134,4 +137,43 @@ public class MendelianViolation { return false; return true; } + + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } + } From ed402588cc37c0bc1b979916f4e623a7671065f4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 20 Oct 2011 16:19:13 -0400 Subject: [PATCH 24/27] Adding the "gold standard NA12878" target --- .../qscripts/MethodsDevelopmentCallingPipeline.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index cc4790f05..88c1bd2c8 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -98,13 +98,17 @@ class MethodsDevelopmentCallingPipeline extends QScript { // BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references val targetDataSets: Map[String, Target] = Map( + "NA12878_gold" -> new Target("NA12878.goldStandard", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/data/goldStandard.list"), + new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** There is no gold standard for the gold standard ** + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, lowPass, !exome, 391), "NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"), - new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), + new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), "NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"), - new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), "NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18, "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask", @@ -142,7 +146,7 @@ class MethodsDevelopmentCallingPipeline extends QScript { "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3), "GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"), - new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"), + new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.GA2.hg19.filtered.vcf"), "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), "FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"), From d7367c152adc2f4197d7354619769d7a515f700d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 20 Oct 2011 17:01:54 -0400 Subject: [PATCH 25/27] Fixing 'revert' when not realigning RevertSam was reverting the alignment information and that was screwing up the pipeline if you didn't want to run it with BWA. Fixed. --- .../queue/qscripts/DataProcessingPipeline.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index f97ce4884..59fcfea96 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -149,7 +149,7 @@ class DataProcessingPipeline extends QScript { var index = 1 for (bam <- bams) { // first revert the BAM file to the original qualities - val revertedBAM = revertBAM(bam) + val revertedBAM = revertBAM(bam, true) val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" ) val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") @@ -183,16 +183,16 @@ class DataProcessingPipeline extends QScript { ConsensusDeterminationModel.USE_READS } - def revertBams(bams: List[File]): List[File] = { + def revertBams(bams: List[File], removeAlignmentInformation: Boolean): List[File] = { var revertedBAMList: List[File] = List() for (bam <- bams) - revertedBAMList :+= revertBAM(bam) + revertedBAMList :+= revertBAM(bam, removeAlignmentInformation) return revertedBAMList } - def revertBAM(bam: File): File = { + def revertBAM(bam: File, removeAlignmentInformation: Boolean): File = { val revertedBAM = swapExt(bam, ".bam", ".reverted.bam") - add(revert(bam, revertedBAM)) + add(revert(bam, revertedBAM, removeAlignmentInformation)) return revertedBAM } @@ -212,7 +212,7 @@ class DataProcessingPipeline extends QScript { val bams = QScriptUtils.createListFromFile(input) nContigs = QScriptUtils.getNumberOfContigs(bams(0)) - val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)} + val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams, false)} // generate a BAM file per sample joining all per lane files if necessary val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs) @@ -408,9 +408,10 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".rg" } - case class revert (inBam: File, outBam: File) extends RevertSam with ExternalCommonArgs { + case class revert (inBam: File, outBam: File, removeAlignmentInfo: Boolean) extends RevertSam with ExternalCommonArgs { this.output = outBam this.input :+= inBam + this.removeAlignmentInformation = removeAlignmentInfo; this.analysisName = queueLogDir + outBam + "revert" this.jobName = queueLogDir + outBam + ".revert" From c9d8b22092b04e1ec6d05d1a3f98a109de94851b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 20 Oct 2011 18:36:28 -0400 Subject: [PATCH 26/27] Added BWASW support to the pipeline Data Processing Pipeline can now use BWASW for realigning the reads. Useful for Ion Torrent data. --- .../qscripts/DataProcessingPipeline.scala | 40 ++++++++-- .../extensions/picard/PicardBamFunction.scala | 2 +- .../queue/extensions/picard/SamToFastq.scala | 76 +++++++++++++++++++ 3 files changed, 109 insertions(+), 9 deletions(-) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 7262de863..5e5fd2365 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -65,6 +65,9 @@ class DataProcessingPipeline extends QScript { @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) var useBWApe: Boolean = false + @Input(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) + var useBWAsw: Boolean = false + @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 @@ -162,22 +165,28 @@ class DataProcessingPipeline extends QScript { var index = 1 for (bam <- bams) { // first revert the BAM file to the original qualities - val revertedBAM = revertBAM(bam, true) - val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" ) val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + if (useBWAse) { + val revertedBAM = revertBAM(bam, true) add(bwa_aln_se(revertedBAM, saiFile1), bwa_sam_se(revertedBAM, saiFile1, realignedSamFile)) } - else { - add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname), - bwa_aln_pe(readSortedBam, saiFile1, 1), - bwa_aln_pe(readSortedBam, saiFile2, 2), - bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile)) + else if (useBWApe) { + val revertedBAM = revertBAM(bam, true) + add(bwa_aln_pe(revertedBAM, saiFile1, 1), + bwa_aln_pe(revertedBAM, saiFile2, 2), + bwa_sam_pe(revertedBAM, saiFile1, saiFile2, realignedSamFile)) + } + else if (useBWAsw) { + val revertedBAM = revertBAM(bam, false) + val fastQ = swapExt(revertedBAM, ".bam", ".fq") + add(convertToFastQ(revertedBAM, fastQ), + bwa_sw(fastQ, realignedSamFile)) } add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate)) addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam)) @@ -226,7 +235,7 @@ class DataProcessingPipeline extends QScript { if (nContigs < 0) nContigs = QScriptUtils.getNumberOfContigs(bams(0)) - val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams, false)} + val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)} // generate a BAM file per sample joining all per lane files if necessary val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs) @@ -427,9 +436,16 @@ class DataProcessingPipeline extends QScript { this.output = outBam this.input :+= inBam this.removeAlignmentInformation = removeAlignmentInfo; + this.sortOrder = if (removeAlignmentInfo) {SortOrder.queryname} else {SortOrder.coordinate} this.analysisName = queueLogDir + outBam + "revert" this.jobName = queueLogDir + outBam + ".revert" + } + case class convertToFastQ (inBam: File, outFQ: File) extends SamToFastq with ExternalCommonArgs { + this.input :+= inBam + this.fastq = outFQ + this.analysisName = queueLogDir + outFQ + "convert_to_fastq" + this.jobName = queueLogDir + outFQ + ".convert_to_fastq" } case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @@ -469,6 +485,14 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".bwa_sam_pe" } + case class bwa_sw (inFastQ: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { + @Input(doc="fastq file to be aligned") var fq = inFastQ + @Output(doc="output bam file") var bam = outBam + def commandLine = bwaPath + " bwasw -t " + bwaThreads + " " + reference + " " + fq + " > " + bam + this.analysisName = queueLogDir + outBam + ".bwasw" + this.jobName = queueLogDir + outBam + ".bwasw" + } + case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction { this.inputFiles = inBams this.listFile = outBamList diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala index 2654e4a3d..427c09f82 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala @@ -50,8 +50,8 @@ trait PicardBamFunction extends JavaCommandLineFunction { abstract override def commandLine = super.commandLine + Array( repeat(" INPUT=", inputBams), - " OUTPUT=" + outputBam, " TMP_DIR=" + jobTempDir, + optional(" OUTPUT=", outputBam), optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency), optional(" SO=", sortOrder), diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala new file mode 100644 index 000000000..2f9f84c63 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala @@ -0,0 +1,76 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline._ + +import java.io.File + +/* + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 6/22/11 + * Time: 10:35 AM + */ +class SamToFastq extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "SamToFastq" + javaMainClass = "net.sf.picard.sam.SamToFastq" + + @Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "Input SAM/BAM file to extract reads from.") + var input: List[File] = Nil + + @Output(shortName = "fastq", fullName = "output_fastq_file", required = true, doc = "Output fastq file (single-end fastq or, if paired, first end of the pair fastq).") + var fastq: File = _ + + @Output(shortName = "se", fullName = "second_end_fastq", required = false, doc = "Output fastq file (if paired, second end of the pair fastq).") + var secondEndFastQ: File = _ + + @Argument(shortName = "opg", fullName = "output_per_readgroup", required = false, doc = "Output a fastq file per read group (two fastq files per read group if the group is paired).") + var outputPerReadGroup: Boolean = false + + @Argument(shortName = "od", fullName = "output_dir", required = false, doc = "Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.") + var outputDir: File = _ + + @Argument(shortName = "rr", fullName = "re_reverse", required = false, doc = "Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq.") + var reReverse: Boolean = true + + @Argument(shortName = "nonpf", fullName = "include_non_pf_reads", required = false, doc = "Include non-PF reads from the SAM file into the output FASTQ files.") + var includeNonPFReads: Boolean = false + + @Argument(shortName = "cat", fullName = "clipping_attribute", required = false, doc = "The attribute that stores the position at which the SAM record should be clipped.") + var clippingAttribute: String = null + + @Argument(shortName = "cac", fullName = "clipping_action", required = false, doc = "The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region.") + var clippingAction: String = null + + @Argument(shortName = "r1t", fullName = "read_one_trim", required = false, doc = "The number of bases to trim from the beginning of read 1.") + var readOneTrim: Int = -1 + + @Argument(shortName = "r1mbtw", fullName = "read_one_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 1 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.") + var readOneMaxBasesToWrite: Int = -1 + + @Argument(shortName = "r2t", fullName = "read_two_trim", required = false, doc = "The number of bases to trim from the beginning of read 2.") + var readTwoTrim: Int = -1 + + @Argument(shortName = "r2mbtw", fullName = "read_two_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 2 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.") + var readTwoMaxBasesToWrite: Int = -1 + + @Argument(shortName = "inpa", fullName = "include_non_primary_alignments", required = false, doc = "If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments.") + var includeNonPrimaryAlignments: Boolean = false + + override def inputBams = input + override def outputBam = null + + override def commandLine = super.commandLine + + " FASTQ=" + fastq + + optional(" SECOND_END_FASTQ=", secondEndFastQ) + + conditionalParameter(outputPerReadGroup, optional(" OUTPUT_PER_RG=", outputPerReadGroup)) + + optional(" OUTPUT_DIR=", outputDir) + + conditionalParameter(!reReverse, optional(" RE_REVERSE=", reReverse)) + + conditionalParameter(includeNonPFReads, optional(" INCLUDE_NON_PF_READS=", includeNonPFReads)) + + optional(" CLIPPING_ATTRIBUTE=", clippingAttribute) + + optional(" CLIPPING_ACTION=", clippingAction) + + conditionalParameter (readOneTrim >= 0, optional(" READ1_TRIM=", readOneTrim)) + + conditionalParameter (readOneMaxBasesToWrite >= 0, optional(" READ1_MAX_BASES_TO_WRITE=", readOneMaxBasesToWrite)) + + conditionalParameter (readTwoTrim >= 0, optional(" READ2_TRIM=", readTwoTrim)) + + conditionalParameter (readTwoMaxBasesToWrite >=0, optional(" READ2_MAX_BASES_TO_WRITE=", readTwoMaxBasesToWrite)) + + conditionalParameter (includeNonPrimaryAlignments, optional(" INCLUDE_NON_PRIMARY_ALIGNMENTS=", includeNonPrimaryAlignments)) +} \ No newline at end of file From 9f867d77ca746f3063f8c626826bf899dc632e85 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 20 Oct 2011 18:44:09 -0400 Subject: [PATCH 27/27] no sort order subtle bug fixed. --- .../sting/queue/extensions/picard/SamToFastq.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala index 2f9f84c63..3a4217e60 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala @@ -58,6 +58,7 @@ class SamToFastq extends org.broadinstitute.sting.queue.function.JavaCommandLine override def inputBams = input override def outputBam = null + this.sortOrder = null override def commandLine = super.commandLine + " FASTQ=" + fastq +