diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 1956dac6c..5d215603a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -204,6 +204,11 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio } public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + throw new UserException("SnpEff support is currently disabled in the GATK until SnpEff 2.0.4 is officially released " + + "due to a serious issue with SnpEff versions prior to 2.0.4. Please see this page for more details: " + + "http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator"); + + /* // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); @@ -223,6 +228,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio // mistaken in the future for a SnpEff output file: headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); + */ } public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index d82472bae..5deee122f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -11,17 +11,53 @@ import java.util.ArrayList; import java.util.List; /** - * A simple collection of the clipping operations to apply to a read along with its read + * A comprehensive clipping tool. + * + * General Contract: + * - All clipping operations return a new read with the clipped bases requested, it never modifies the original read. + * - If a read is fully clipped, return an empty GATKSAMRecord, never null. + * - When hard clipping, add cigar operator H for every *reference base* removed (i.e. Matches, SoftClips and Deletions, but *not* insertions). See Hard Clipping notes for details. + * + * + * There are several types of clipping to use: + * + * Write N's: + * Change the bases to N's in the desired region. This can be applied anywhere in the read. + * + * Write Q0's: + * Change the quality of the bases in the desired region to Q0. This can be applied anywhere in the read. + * + * Write both N's and Q0's: + * Same as the two independent operations, put together. + * + * Soft Clipping: + * Do not change the read, just mark the reads as soft clipped in the Cigar String + * and adjust the alignment start and end of the read. + * + * Hard Clipping: + * Creates a new read without the hard clipped bases (and base qualities). The cigar string + * will be updated with the cigar operator H for every reference base removed (i.e. Matches, + * Soft clipped bases and deletions, but *not* insertions). This contract with the cigar + * is necessary to allow read.getUnclippedStart() / End() to recover the original alignment + * of the read (before clipping). + * */ public class ReadClipper { - GATKSAMRecord read; + final GATKSAMRecord read; boolean wasClipped; List ops = null; /** - * We didn't do any clipping work on this read, just leave everything as a default + * Initializes a ReadClipper object. * - * @param read + * You can set up your clipping operations using the addOp method. When you're ready to + * generate a new read with all the clipping operations, use clipRead(). + * + * Note: Use this if you want to set up multiple operations on the read using the ClippingOp + * class. If you just want to apply one of the typical modes of clipping, use the static + * clipping functions available in this class instead. + * + * @param read the read to clip */ public ReadClipper(final GATKSAMRecord read) { this.read = read; @@ -29,32 +65,55 @@ public class ReadClipper { } /** - * Add another clipping operation to apply to this read + * Add clipping operation to the read. * - * @param op + * You can add as many operations as necessary to this read before clipping. Beware that the + * order in which you add these operations matter. For example, if you hard clip the beginning + * of a read first then try to hard clip the end, the indices will have changed. Make sure you + * know what you're doing, otherwise just use the static functions below that take care of the + * ordering for you. + * + * Note: You only choose the clipping mode when you use clipRead() + * + * @param op a ClippingOp object describing the area you want to clip. */ public void addOp(ClippingOp op) { if (ops == null) ops = new ArrayList(); ops.add(op); } + /** + * Check the list of operations set up for this read. + * + * @return a list of the operations set up for this read. + */ public List getOps() { return ops; } + /** + * Check whether or not this read has been clipped. + * @return true if this read has produced a clipped read, false otherwise. + */ public boolean wasClipped() { return wasClipped; } + /** + * The original read. + * + * @return returns the read to be clipped (original) + */ public GATKSAMRecord getRead() { return read; } /** - * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. + * Creates a new read that's been clipped according to ops and the chosen algorithm. + * The original read is unmodified. * - * @param algorithm - * @return + * @param algorithm What mode of clipping do you want to apply for the stacked operations. + * @return a new read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) @@ -84,41 +143,47 @@ public class ReadClipper { } - - - // QUICK USE UTILITY FUNCTION - + /** + * Hard clips the left tail of a read up to (and including) refStop using reference + * coordinates. + * + * @param refStop the last base to be hard clipped in the left tail of the read. + * @return a new read, without the left tail. + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { return hardClipByReferenceCoordinates(-1, refStop); } + public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) { + return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop); + } + + + /** + * Hard clips the right tail of a read starting at (and including) refStart using reference + * coordinates. + * + * @param refStart refStop the first base to be hard clipped in the right tail of the read. + * @return a new read, without the right tail. + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { return hardClipByReferenceCoordinates(refStart, -1); } - - @Requires("!read.getReadUnmappedFlag()") - protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { - int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); - int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); - - if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); - - if (start < 0 || stop > read.getReadLength() - 1) - throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - - if ( start > stop ) - throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); - - if ( start > 0 && stop < read.getReadLength() - 1) - throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); - - this.addOp(new ClippingOp(start, stop)); - GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); - this.ops = null; - return clippedRead; + public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) { + return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1); } + /** + * Hard clips a read using read coordinates. + * + * @param start the first base to clip (inclusive) + * @param stop the last base to clip (inclusive) + * @return a new read, without the clipped bases + */ + @Requires({"start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read + "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return new GATKSAMRecord(read.getHeader()); @@ -126,8 +191,23 @@ public class ReadClipper { this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) { + return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop); + } - @Requires("left <= right") + + /** + * Hard clips both tails of a read. + * Left tail goes from the beginning to the 'left' coordinate (inclusive) + * Right tail goes from the 'right' coordinate (inclusive) until the end of the read + * + * @param left the coordinate of the last base to be clipped in the left tail (inclusive) + * @param right the coordinate of the first base to be clipped in the right tail (inclusive) + * @return a new read, without the clipped bases + */ + @Requires({"left <= right", // tails cannot overlap + "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read + "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return new GATKSAMRecord(read.getHeader()); @@ -141,7 +221,20 @@ public class ReadClipper { ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } + public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) { + return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right); + } + + /** + * Hard clips any contiguous tail (left, right or both) with base quality lower than lowQual. + * + * This function will look for low quality tails and hard clip them away. A low quality tail + * ends when a base has base quality greater than lowQual. + * + * @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped + * @return a new read without low quality tails + */ public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { if (read.isEmpty()) return read; @@ -154,7 +247,7 @@ public class ReadClipper { while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--; while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++; - // if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now) + // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) return (new GATKSAMRecord(read.getHeader())); @@ -166,7 +259,16 @@ public class ReadClipper { } return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) { + return (new ReadClipper(read)).hardClipLowQualEnds(lowQual); + } + + /** + * Will hard clip every soft clipped bases in the read. + * + * @return a new read without the soft clipped bases + */ public GATKSAMRecord hardClipSoftClippedBases () { if (read.isEmpty()) return read; @@ -200,7 +302,18 @@ public class ReadClipper { return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) { + return (new ReadClipper(read)).hardClipSoftClippedBases(); + } + + /** + * Checks if a read contains adaptor sequences. If it does, hard clips them out. + * + * Note: To see how a read is checked for adaptor sequence see ReadUtils.getAdaptorBoundary() + * + * @return a new read without adaptor sequence + */ public GATKSAMRecord hardClipAdaptorSequence () { final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read); @@ -209,7 +322,16 @@ public class ReadClipper { return read.getReadNegativeStrandFlag() ? hardClipByReferenceCoordinatesLeftTail(adaptorBoundary) : hardClipByReferenceCoordinatesRightTail(adaptorBoundary); } + public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) { + return (new ReadClipper(read)).hardClipAdaptorSequence(); + } + + /** + * Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the end. + * + * @return a new read without leading insertions + */ public GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; @@ -225,49 +347,54 @@ public class ReadClipper { } return clipRead(ClippingRepresentation.HARDCLIP_BASES); } - - public GATKSAMRecord revertSoftClippedBases() { - this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates - return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); - } - - - - // STATIC VERSIONS OF THE QUICK CLIPPING FUNCTIONS - - public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) { - return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop); - } - - public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) { - return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1); - } - - public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) { - return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop); - } - - public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) { - return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right); - } - - public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) { - return (new ReadClipper(read)).hardClipLowQualEnds(lowQual); - } - - public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) { - return (new ReadClipper(read)).hardClipSoftClippedBases(); - } - - public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) { - return (new ReadClipper(read)).hardClipAdaptorSequence(); - } - public static GATKSAMRecord hardClipLeadingInsertions(GATKSAMRecord read) { return (new ReadClipper(read)).hardClipLeadingInsertions(); } + + /** + * Turns soft clipped bases into matches + * + * @return a new read with every soft clip turned into a match + */ + public GATKSAMRecord revertSoftClippedBases() { + this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates + return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); + } public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { return (new ReadClipper(read)).revertSoftClippedBases(); } + + /** + * Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail + * and hardClipByReferenceCoordinatesRightTail. Should not be used directly. + * + * @param refStart first base to clip (inclusive) + * @param refStop last base to clip (inclusive) + * @return a new read, without the clipped bases + */ + @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip + protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); + + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + + if (start < 0 || stop > read.getReadLength() - 1) + throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + + if ( start > stop ) + throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + + if ( start > 0 && stop < read.getReadLength() - 1) + throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); + + this.addOp(new ClippingOp(start, stop)); + GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + this.ops = null; + return clippedRead; + } + + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 83c3d3a1e..fa0b62cfd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -145,7 +145,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { } } - @Test + @Test(enabled = false) public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + @@ -157,7 +157,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing SnpEff annotations", spec); } - @Test + @Test(enabled = false) public void testSnpEffAnnotationsUnsupportedVersion() { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index ba33108cf..3c6131d6c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -298,7 +298,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test MultiSample Phase1 indels with complicated records", spec4); } - @Test + @Test(enabled = false) public void testSnpEffAnnotationRequestedWithoutRodBinding() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 4e3d38c4f..3ef4e5e9f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -14,7 +14,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; - @Test + @Test(enabled = false) public void testFunctionClassWithSnpeff() { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine(