From 506c0e9c9773135a46a3ad3740f1fb2e875eee83 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 23 Dec 2011 16:44:28 -0500 Subject: [PATCH 1/5] Disabling SnpEff support in the GATK and SnpEff annotation in the HybridSelectionPipeline SnpEff support will remain disabled until SnpEff 2.0.4 has been officially released and we've verified the quality of its annotations. --- .../broadinstitute/sting/gatk/walkers/annotator/SnpEff.java | 6 ++++++ .../walkers/annotator/VariantAnnotatorIntegrationTest.java | 4 ++-- .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +- .../walkers/varianteval/VariantEvalIntegrationTest.java | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 1956dac6c..5d215603a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -204,6 +204,11 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio } public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + throw new UserException("SnpEff support is currently disabled in the GATK until SnpEff 2.0.4 is officially released " + + "due to a serious issue with SnpEff versions prior to 2.0.4. Please see this page for more details: " + + "http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator"); + + /* // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); @@ -223,6 +228,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio // mistaken in the future for a SnpEff output file: headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); + */ } public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 83c3d3a1e..fa0b62cfd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -145,7 +145,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { } } - @Test + @Test(enabled = false) public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + @@ -157,7 +157,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing SnpEff annotations", spec); } - @Test + @Test(enabled = false) public void testSnpEffAnnotationsUnsupportedVersion() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index ba33108cf..3c6131d6c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -298,7 +298,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test MultiSample Phase1 indels with complicated records", spec4); } - @Test + @Test(enabled = false) public void testSnpEffAnnotationRequestedWithoutRodBinding() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 4e3d38c4f..3ef4e5e9f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -14,7 +14,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; - @Test + @Test(enabled = false) public void testFunctionClassWithSnpeff() { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( From 35c41409a118eaf6405ca3dbfe199aeb8b3b2b96 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 23 Dec 2011 19:35:00 -0500 Subject: [PATCH 2/5] Better contracts and docs for the ReadClipper * Described the ReadClipper contract in the top of the class * Added contracts where applicable * Added descriptive information to all tools in the read clipper * Organized public members and static methods together with the same javadoc --- .../sting/utils/clipping/ReadClipper.java | 277 +++++++++++++----- 1 file changed, 202 insertions(+), 75 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index d82472bae..5deee122f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -11,17 +11,53 @@ import java.util.ArrayList; import java.util.List; /** - * A simple collection of the clipping operations to apply to a read along with its read + * A comprehensive clipping tool. + * + * General Contract: + * - All clipping operations return a new read with the clipped bases requested, it never modifies the original read. + * - If a read is fully clipped, return an empty GATKSAMRecord, never null. + * - When hard clipping, add cigar operator H for every *reference base* removed (i.e. Matches, SoftClips and Deletions, but *not* insertions). See Hard Clipping notes for details. + * + * + * There are several types of clipping to use: + * + * Write N's: + * Change the bases to N's in the desired region. This can be applied anywhere in the read. + * + * Write Q0's: + * Change the quality of the bases in the desired region to Q0. This can be applied anywhere in the read. + * + * Write both N's and Q0's: + * Same as the two independent operations, put together. + * + * Soft Clipping: + * Do not change the read, just mark the reads as soft clipped in the Cigar String + * and adjust the alignment start and end of the read. + * + * Hard Clipping: + * Creates a new read without the hard clipped bases (and base qualities). The cigar string + * will be updated with the cigar operator H for every reference base removed (i.e. Matches, + * Soft clipped bases and deletions, but *not* insertions). This contract with the cigar + * is necessary to allow read.getUnclippedStart() / End() to recover the original alignment + * of the read (before clipping). + * */ public class ReadClipper { - GATKSAMRecord read; + final GATKSAMRecord read; boolean wasClipped; List ops = null; /** - * We didn't do any clipping work on this read, just leave everything as a default + * Initializes a ReadClipper object. * - * @param read + * You can set up your clipping operations using the addOp method. When you're ready to + * generate a new read with all the clipping operations, use clipRead(). + * + * Note: Use this if you want to set up multiple operations on the read using the ClippingOp + * class. If you just want to apply one of the typical modes of clipping, use the static + * clipping functions available in this class instead. + * + * @param read the read to clip */ public ReadClipper(final GATKSAMRecord read) { this.read = read; @@ -29,32 +65,55 @@ public class ReadClipper { } /** - * Add another clipping operation to apply to this read + * Add clipping operation to the read. * - * @param op + * You can add as many operations as necessary to this read before clipping. Beware that the + * order in which you add these operations matter. For example, if you hard clip the beginning + * of a read first then try to hard clip the end, the indices will have changed. Make sure you + * know what you're doing, otherwise just use the static functions below that take care of the + * ordering for you. + * + * Note: You only choose the clipping mode when you use clipRead() + * + * @param op a ClippingOp object describing the area you want to clip. */ public void addOp(ClippingOp op) { if (ops == null) ops = new ArrayList(); ops.add(op); } + /** + * Check the list of operations set up for this read. + * + * @return a list of the operations set up for this read. + */ public List getOps() { return ops; } + /** + * Check whether or not this read has been clipped. + * @return true if this read has produced a clipped read, false otherwise. + */ public boolean wasClipped() { return wasClipped; } + /** + * The original read. + * + * @return returns the read to be clipped (original) + */ public GATKSAMRecord getRead() { return read; } /** - * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. + * Creates a new read that's been clipped according to ops and the chosen algorithm. + * The original read is unmodified. * - * @param algorithm - * @return + * @param algorithm What mode of clipping do you want to apply for the stacked operations. + * @return a new read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) @@ -84,41 +143,47 @@ public class ReadClipper { } - - - // QUICK USE UTILITY FUNCTION - + /** + * Hard clips the left tail of a read up to (and including) refStop using reference + * coordinates. + * + * @param refStop the last base to be hard clipped in the left tail of the read. + * @return a new read, without the left tail. + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { return hardClipByReferenceCoordinates(-1, refStop); } + public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) { + return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop); + } + + + /** + * Hard clips the right tail of a read starting at (and including) refStart using reference + * coordinates. + * + * @param refStart refStop the first base to be hard clipped in the right tail of the read. + * @return a new read, without the right tail. + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { return hardClipByReferenceCoordinates(refStart, -1); } - - @Requires("!read.getReadUnmappedFlag()") - protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { - int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); - int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); - - if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); - - if (start < 0 || stop > read.getReadLength() - 1) - throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - - if ( start > stop ) - throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); - - if ( start > 0 && stop < read.getReadLength() - 1) - throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); - - this.addOp(new ClippingOp(start, stop)); - GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); - this.ops = null; - return clippedRead; + public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) { + return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1); } + /** + * Hard clips a read using read coordinates. + * + * @param start the first base to clip (inclusive) + * @param stop the last base to clip (inclusive) + * @return a new read, without the clipped bases + */ + @Requires({"start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read + "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return new GATKSAMRecord(read.getHeader()); @@ -126,8 +191,23 @@ public class ReadClipper { this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) { + return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop); + } - @Requires("left <= right") + + /** + * Hard clips both tails of a read. + * Left tail goes from the beginning to the 'left' coordinate (inclusive) + * Right tail goes from the 'right' coordinate (inclusive) until the end of the read + * + * @param left the coordinate of the last base to be clipped in the left tail (inclusive) + * @param right the coordinate of the first base to be clipped in the right tail (inclusive) + * @return a new read, without the clipped bases + */ + @Requires({"left <= right", // tails cannot overlap + "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read + "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return new GATKSAMRecord(read.getHeader()); @@ -141,7 +221,20 @@ public class ReadClipper { ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } + public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) { + return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right); + } + + /** + * Hard clips any contiguous tail (left, right or both) with base quality lower than lowQual. + * + * This function will look for low quality tails and hard clip them away. A low quality tail + * ends when a base has base quality greater than lowQual. + * + * @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped + * @return a new read without low quality tails + */ public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { if (read.isEmpty()) return read; @@ -154,7 +247,7 @@ public class ReadClipper { while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--; while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++; - // if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now) + // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) return (new GATKSAMRecord(read.getHeader())); @@ -166,7 +259,16 @@ public class ReadClipper { } return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) { + return (new ReadClipper(read)).hardClipLowQualEnds(lowQual); + } + + /** + * Will hard clip every soft clipped bases in the read. + * + * @return a new read without the soft clipped bases + */ public GATKSAMRecord hardClipSoftClippedBases () { if (read.isEmpty()) return read; @@ -200,7 +302,18 @@ public class ReadClipper { return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) { + return (new ReadClipper(read)).hardClipSoftClippedBases(); + } + + /** + * Checks if a read contains adaptor sequences. If it does, hard clips them out. + * + * Note: To see how a read is checked for adaptor sequence see ReadUtils.getAdaptorBoundary() + * + * @return a new read without adaptor sequence + */ public GATKSAMRecord hardClipAdaptorSequence () { final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read); @@ -209,7 +322,16 @@ public class ReadClipper { return read.getReadNegativeStrandFlag() ? hardClipByReferenceCoordinatesLeftTail(adaptorBoundary) : hardClipByReferenceCoordinatesRightTail(adaptorBoundary); } + public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) { + return (new ReadClipper(read)).hardClipAdaptorSequence(); + } + + /** + * Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the end. + * + * @return a new read without leading insertions + */ public GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; @@ -225,49 +347,54 @@ public class ReadClipper { } return clipRead(ClippingRepresentation.HARDCLIP_BASES); } - - public GATKSAMRecord revertSoftClippedBases() { - this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates - return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); - } - - - - // STATIC VERSIONS OF THE QUICK CLIPPING FUNCTIONS - - public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) { - return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop); - } - - public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) { - return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1); - } - - public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) { - return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop); - } - - public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) { - return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right); - } - - public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) { - return (new ReadClipper(read)).hardClipLowQualEnds(lowQual); - } - - public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) { - return (new ReadClipper(read)).hardClipSoftClippedBases(); - } - - public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) { - return (new ReadClipper(read)).hardClipAdaptorSequence(); - } - public static GATKSAMRecord hardClipLeadingInsertions(GATKSAMRecord read) { return (new ReadClipper(read)).hardClipLeadingInsertions(); } + + /** + * Turns soft clipped bases into matches + * + * @return a new read with every soft clip turned into a match + */ + public GATKSAMRecord revertSoftClippedBases() { + this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates + return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); + } public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { return (new ReadClipper(read)).revertSoftClippedBases(); } + + /** + * Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail + * and hardClipByReferenceCoordinatesRightTail. Should not be used directly. + * + * @param refStart first base to clip (inclusive) + * @param refStop last base to clip (inclusive) + * @return a new read, without the clipped bases + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip + protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); + + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + + if (start < 0 || stop > read.getReadLength() - 1) + throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + + if ( start > stop ) + throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + + if ( start > 0 && stop < read.getReadLength() - 1) + throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); + + this.addOp(new ClippingOp(start, stop)); + GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + this.ops = null; + return clippedRead; + } + + }