Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2011-12-26 14:45:35 -05:00
commit dd990061f6
5 changed files with 212 additions and 79 deletions

View File

@ -204,6 +204,11 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
throw new UserException("SnpEff support is currently disabled in the GATK until SnpEff 2.0.4 is officially released " +
"due to a serious issue with SnpEff versions prior to 2.0.4. Please see this page for more details: " +
"http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator");
/*
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding());
@ -223,6 +228,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
*/
}
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {

View File

@ -11,17 +11,53 @@ import java.util.ArrayList;
import java.util.List;
/**
* A simple collection of the clipping operations to apply to a read along with its read
* A comprehensive clipping tool.
*
* General Contract:
* - All clipping operations return a new read with the clipped bases requested, it never modifies the original read.
* - If a read is fully clipped, return an empty GATKSAMRecord, never null.
* - When hard clipping, add cigar operator H for every *reference base* removed (i.e. Matches, SoftClips and Deletions, but *not* insertions). See Hard Clipping notes for details.
*
*
* There are several types of clipping to use:
*
* Write N's:
* Change the bases to N's in the desired region. This can be applied anywhere in the read.
*
* Write Q0's:
* Change the quality of the bases in the desired region to Q0. This can be applied anywhere in the read.
*
* Write both N's and Q0's:
* Same as the two independent operations, put together.
*
* Soft Clipping:
* Do not change the read, just mark the reads as soft clipped in the Cigar String
* and adjust the alignment start and end of the read.
*
* Hard Clipping:
* Creates a new read without the hard clipped bases (and base qualities). The cigar string
* will be updated with the cigar operator H for every reference base removed (i.e. Matches,
* Soft clipped bases and deletions, but *not* insertions). This contract with the cigar
* is necessary to allow read.getUnclippedStart() / End() to recover the original alignment
* of the read (before clipping).
*
*/
public class ReadClipper {
GATKSAMRecord read;
final GATKSAMRecord read;
boolean wasClipped;
List<ClippingOp> ops = null;
/**
* We didn't do any clipping work on this read, just leave everything as a default
* Initializes a ReadClipper object.
*
* @param read
* You can set up your clipping operations using the addOp method. When you're ready to
* generate a new read with all the clipping operations, use clipRead().
*
* Note: Use this if you want to set up multiple operations on the read using the ClippingOp
* class. If you just want to apply one of the typical modes of clipping, use the static
* clipping functions available in this class instead.
*
* @param read the read to clip
*/
public ReadClipper(final GATKSAMRecord read) {
this.read = read;
@ -29,32 +65,55 @@ public class ReadClipper {
}
/**
* Add another clipping operation to apply to this read
* Add clipping operation to the read.
*
* @param op
* You can add as many operations as necessary to this read before clipping. Beware that the
* order in which you add these operations matter. For example, if you hard clip the beginning
* of a read first then try to hard clip the end, the indices will have changed. Make sure you
* know what you're doing, otherwise just use the static functions below that take care of the
* ordering for you.
*
* Note: You only choose the clipping mode when you use clipRead()
*
* @param op a ClippingOp object describing the area you want to clip.
*/
public void addOp(ClippingOp op) {
if (ops == null) ops = new ArrayList<ClippingOp>();
ops.add(op);
}
/**
* Check the list of operations set up for this read.
*
* @return a list of the operations set up for this read.
*/
public List<ClippingOp> getOps() {
return ops;
}
/**
* Check whether or not this read has been clipped.
* @return true if this read has produced a clipped read, false otherwise.
*/
public boolean wasClipped() {
return wasClipped;
}
/**
* The original read.
*
* @return returns the read to be clipped (original)
*/
public GATKSAMRecord getRead() {
return read;
}
/**
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
* Creates a new read that's been clipped according to ops and the chosen algorithm.
* The original read is unmodified.
*
* @param algorithm
* @return
* @param algorithm What mode of clipping do you want to apply for the stacked operations.
* @return a new read with the clipping applied.
*/
public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
if (ops == null)
@ -84,41 +143,47 @@ public class ReadClipper {
}
// QUICK USE UTILITY FUNCTION
/**
* Hard clips the left tail of a read up to (and including) refStop using reference
* coordinates.
*
* @param refStop the last base to be hard clipped in the left tail of the read.
* @return a new read, without the left tail.
*/
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop);
}
/**
* Hard clips the right tail of a read starting at (and including) refStart using reference
* coordinates.
*
* @param refStart refStop the first base to be hard clipped in the right tail of the read.
* @return a new read, without the right tail.
*/
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
@Requires("!read.getReadUnmappedFlag()")
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
if ( start > 0 && stop < read.getReadLength() - 1)
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
this.addOp(new ClippingOp(start, stop));
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1);
}
/**
* Hard clips a read using read coordinates.
*
* @param start the first base to clip (inclusive)
* @param stop the last base to clip (inclusive)
* @return a new read, without the clipped bases
*/
@Requires({"start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read
"start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
@ -126,8 +191,23 @@ public class ReadClipper {
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) {
return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop);
}
@Requires("left <= right")
/**
* Hard clips both tails of a read.
* Left tail goes from the beginning to the 'left' coordinate (inclusive)
* Right tail goes from the 'right' coordinate (inclusive) until the end of the read
*
* @param left the coordinate of the last base to be clipped in the left tail (inclusive)
* @param right the coordinate of the first base to be clipped in the right tail (inclusive)
* @return a new read, without the clipped bases
*/
@Requires({"left <= right", // tails cannot overlap
"left >= read.getAlignmentStart()", // coordinate has to be within the mapped read
"right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read
public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (read.isEmpty() || left == right)
return new GATKSAMRecord(read.getHeader());
@ -141,7 +221,20 @@ public class ReadClipper {
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
}
public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) {
return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right);
}
/**
* Hard clips any contiguous tail (left, right or both) with base quality lower than lowQual.
*
* This function will look for low quality tails and hard clip them away. A low quality tail
* ends when a base has base quality greater than lowQual.
*
* @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped
* @return a new read without low quality tails
*/
public GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
if (read.isEmpty())
return read;
@ -154,7 +247,7 @@ public class ReadClipper {
while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--;
while (leftClipIndex < read.getReadLength() && quals[leftClipIndex] <= lowQual) leftClipIndex++;
// if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now)
// if the entire read should be clipped, then return an empty read.
if (leftClipIndex > rightClipIndex)
return (new GATKSAMRecord(read.getHeader()));
@ -166,7 +259,16 @@ public class ReadClipper {
}
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) {
return (new ReadClipper(read)).hardClipLowQualEnds(lowQual);
}
/**
* Will hard clip every soft clipped bases in the read.
*
* @return a new read without the soft clipped bases
*/
public GATKSAMRecord hardClipSoftClippedBases () {
if (read.isEmpty())
return read;
@ -200,7 +302,18 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipSoftClippedBases();
}
/**
* Checks if a read contains adaptor sequences. If it does, hard clips them out.
*
* Note: To see how a read is checked for adaptor sequence see ReadUtils.getAdaptorBoundary()
*
* @return a new read without adaptor sequence
*/
public GATKSAMRecord hardClipAdaptorSequence () {
final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read);
@ -209,7 +322,16 @@ public class ReadClipper {
return read.getReadNegativeStrandFlag() ? hardClipByReferenceCoordinatesLeftTail(adaptorBoundary) : hardClipByReferenceCoordinatesRightTail(adaptorBoundary);
}
public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipAdaptorSequence();
}
/**
* Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the end.
*
* @return a new read without leading insertions
*/
public GATKSAMRecord hardClipLeadingInsertions() {
if (read.isEmpty())
return read;
@ -225,49 +347,54 @@ public class ReadClipper {
}
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public GATKSAMRecord revertSoftClippedBases() {
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
}
// STATIC VERSIONS OF THE QUICK CLIPPING FUNCTIONS
public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop);
}
public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1);
}
public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) {
return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop);
}
public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) {
return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right);
}
public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) {
return (new ReadClipper(read)).hardClipLowQualEnds(lowQual);
}
public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipSoftClippedBases();
}
public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipAdaptorSequence();
}
public static GATKSAMRecord hardClipLeadingInsertions(GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipLeadingInsertions();
}
/**
* Turns soft clipped bases into matches
*
* @return a new read with every soft clip turned into a match
*/
public GATKSAMRecord revertSoftClippedBases() {
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
}
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
return (new ReadClipper(read)).revertSoftClippedBases();
}
/**
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
*
* @param refStart first base to clip (inclusive)
* @param refStop last base to clip (inclusive)
* @return a new read, without the clipped bases
*/
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
if ( start > 0 && stop < read.getReadLength() - 1)
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
this.addOp(new ClippingOp(start, stop));
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
}
}

View File

@ -145,7 +145,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
}
}
@Test
@Test(enabled = false)
public void testSnpEffAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
@ -157,7 +157,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing SnpEff annotations", spec);
}
@Test
@Test(enabled = false)
public void testSnpEffAnnotationsUnsupportedVersion() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +

View File

@ -298,7 +298,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}
@Test
@Test(enabled = false)
public void testSnpEffAnnotationRequestedWithoutRodBinding() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +

View File

@ -14,7 +14,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference;
@Test
@Test(enabled = false)
public void testFunctionClassWithSnpeff() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(