From 41aba491c0412da2a79c5fc37a9b347f5077fcbf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 27 Jun 2013 15:44:53 -0400 Subject: [PATCH] Critical bugfix for adapter clipping in HaplotypeCaller -- The previous code would adapter clip before reverting soft clips, so because we only clip the adapter when it's actually aligned (i.e., not in the soft clips) we were actually not removing bases in the adapter unless at least 1 bp of the adapter was aligned to the reference. Terrible. -- Removed the broken logic of determining whether a read adaptor is too long. -- Doesn't require isProperPairFlag to be set for a read to be adapter clipped -- Update integration tests for new adapter clipping code --- .../haplotypecaller/HaplotypeCaller.java | 40 ++++---- ...lexAndSymbolicVariantsIntegrationTest.java | 2 +- .../HaplotypeCallerIntegrationTest.java | 4 +- ...aplotypeCallerParallelIntegrationTest.java | 2 +- .../sting/utils/sam/ReadUtils.java | 69 ++++++++------ .../fragments/FragmentUtilsUnitTest.java | 11 ++- .../LocusIteratorByStateUnitTest.java | 32 ++----- .../sting/utils/sam/ReadUtilsUnitTest.java | 95 +++++++++++++++++++ 8 files changed, 177 insertions(+), 78 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index db1ca552a..e1398d48f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -929,28 +929,28 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // Loop through the reads hard clipping the adaptor and low quality tails final List readsToUse = new ArrayList<>(activeRegion.getReads().size()); for( final GATKSAMRecord myRead : activeRegion.getReads() ) { - final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); - if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { - GATKSAMRecord clippedRead; - if (errorCorrectReads) - clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION ); - else if (useLowQualityBasesForAssembly) - clippedRead = postAdapterRead; - else // default case: clip low qual ends of reads - clippedRead= ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); + GATKSAMRecord clippedRead; + if (errorCorrectReads) + clippedRead = ReadClipper.hardClipLowQualEnds( myRead, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION ); + else if (useLowQualityBasesForAssembly) + clippedRead = myRead; + else // default case: clip low qual ends of reads + clippedRead= ReadClipper.hardClipLowQualEnds( myRead, MIN_TAIL_QUALITY ); - if ( dontUseSoftClippedBases ) { - // uncomment to remove hard clips from consideration at all - clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); - } else { - // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches - // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't - // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion - // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the - // TODO -- reference haplotype start must be removed - clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); - } + if ( dontUseSoftClippedBases || ! ReadUtils.hasWellDefinedFragmentSize(clippedRead) ) { + // remove soft clips if we cannot reliably clip off adapter sequence or if the user doesn't want to use soft clips at all + clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); + } else { + // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches + // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't + // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion + // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the + // TODO -- reference haplotype start must be removed + clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); + } + clippedRead = ( clippedRead.getReadUnmappedFlag() ? clippedRead : ReadClipper.hardClipAdaptorSequence( clippedRead ) ); + if( clippedRead != null && !clippedRead.isEmpty() && clippedRead.getCigar().getReadLength() > 0 ) { clippedRead = ReadClipper.hardClipToRegion( clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop() ); if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { //logger.info("Keeping read " + clippedRead + " start " + clippedRead.getAlignmentStart() + " end " + clippedRead.getAlignmentEnd()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 0636d7c1b..68e61757f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a3479fc4ad387d381593b328f737a1b"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "12ed9d67139e7a94d67e9e6c06ac6e16"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index aca1172d4..bf3517e3b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("cc6f2a76ee97ecc14a5f956ffbb21d88")); + Arrays.asList("c5c63d03e1c4bbe32f06902acd4a10f9")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("51e91c8af61a6b47807165906baefb00")); + Arrays.asList("f0b2a96040429908cce17327442eec29")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index d009550f4..3b17725f9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "9da4cc89590c4c64a36f4a9c820f8609"}); + tests.add(new Object[]{nct, "e800f6bb3a820da5c6b29f0195480796"}); } return tests.toArray(new Object[][]{}); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index f9393cc4b..ab866013f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -30,10 +30,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.NGSPlatform; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -214,34 +211,52 @@ public class ReadUtils { * CANNOT_COMPUTE_ADAPTOR_BOUNDARY if the read is unmapped or the mate is mapped to another contig. */ public static int getAdaptorBoundary(final SAMRecord read) { - final int MAXIMUM_ADAPTOR_LENGTH = 8; - final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value) - - if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs - return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; - - if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) { - // note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping -// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY", -// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(), -// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag())); + if ( ! hasWellDefinedFragmentSize(read) ) { return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; + } else if ( read.getReadNegativeStrandFlag() ) { + return read.getMateAlignmentStart() - 1; // case 1 (see header) + } else { + final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value) + return read.getAlignmentStart() + insertSize + 1; // case 2 (see header) } - - - int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) - if (read.getReadNegativeStrandFlag()) - adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) - else - adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) - - if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) ) - adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor - - return adaptorBoundary; } + public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE; + /** + * Can the adaptor sequence of read be reliably removed from the read based on the alignment of + * read and its mate? + * + * @param read the read to check + * @return true if it can, false otherwise + */ + public static boolean hasWellDefinedFragmentSize(final SAMRecord read) { + if ( read.getInferredInsertSize() == 0 ) + // no adaptors in reads with mates in another chromosome or unmapped pairs + return false; + if ( ! read.getReadPairedFlag() ) + // only reads that are paired can be adaptor trimmed + return false; + if ( read.getReadUnmappedFlag() || read.getMateUnmappedFlag() ) + // only reads when both reads are mapped can be trimmed + return false; +// if ( ! read.getProperPairFlag() ) +// // note this flag isn't always set properly in BAMs, can will stop us from eliminating some proper pairs +// // reads that aren't part of a proper pair (i.e., have strange alignments) can't be trimmed +// return false; + if ( read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) + // sanity check on getProperPairFlag to ensure that read1 and read2 aren't on the same strand + return false; + + if ( read.getReadNegativeStrandFlag() ) { + // we're on the negative strand, so our read runs right to left + return read.getAlignmentEnd() > read.getMateAlignmentStart(); + } else { + // we're on the positive strand, so our mate should be to our right (his start + insert size should be past our start) + return read.getAlignmentStart() <= read.getMateAlignmentStart() + read.getInferredInsertSize(); + } + } + /** * is the read a 454 read? * diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index 0886427ca..e2e253d0f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -253,17 +253,22 @@ public class FragmentUtilsUnitTest extends BaseTest { final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10); read1.setCigarString("4S" + common.length() + "M"); read1.setProperPairFlag(true); + read1.setReadPairedFlag(true); read1.setFirstOfPairFlag(true); read1.setReadNegativeStrandFlag(true); - read1.setMateAlignmentStart(10); + read1.setMateNegativeStrandFlag(false); + read1.setMateAlignmentStart(read2.getAlignmentStart()); read2.setCigarString(common.length() + "M4S"); read2.setProperPairFlag(true); + read2.setReadPairedFlag(true); read2.setFirstOfPairFlag(false); read2.setReadNegativeStrandFlag(false); + read2.setMateNegativeStrandFlag(true); + read2.setMateAlignmentStart(read1.getAlignmentStart()); final int insertSize = common.length() - 1; - read1.setInferredInsertSize(insertSize); - read2.setInferredInsertSize(-insertSize); + read1.setInferredInsertSize(-insertSize); + read2.setInferredInsertSize(insertSize); final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index d2f29ee7a..54554ee64 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -54,7 +54,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { private static final boolean DEBUG = false; protected LocusIteratorByState li; - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testUnmappedAndAllIReadsPassThrough() { final int readLength = 10; GATKSAMRecord mapped1 = ArtificialSAMUtils.createArtificialRead(header,"mapped1",0,1,readLength); @@ -697,24 +697,28 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { final List tests = new LinkedList(); final int start = 10; -// for ( final int goodBases : Arrays.asList(10) ) { -// for ( final int nClipsOnTheRight : Arrays.asList(0)) { for ( final int goodBases : Arrays.asList(10, 20, 30) ) { for ( final int nClips : Arrays.asList(0, 1, 2, 10)) { for ( final boolean onLeft : Arrays.asList(true, false) ) { final int readLength = nClips + goodBases; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); + read.setProperPairFlag(true); + read.setReadPairedFlag(true); + read.setReadUnmappedFlag(false); + read.setMateUnmappedFlag(false); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); read.setCigarString(readLength + "M"); if ( onLeft ) { read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); read.setMateAlignmentStart(start + nClips); read.setInferredInsertSize(readLength); tests.add(new Object[]{nClips, goodBases, 0, read}); } else { read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); read.setMateAlignmentStart(start - 1); read.setInferredInsertSize(goodBases - 1); tests.add(new Object[]{0, goodBases, nClips, read}); @@ -723,29 +727,13 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { } } -// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) { -// final int readLength = nClipsOnTheLeft + goodBases; -// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); -// read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); -// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); -// read.setCigarString(readLength + "M"); -// read.setReadNegativeStrandFlag(true); -// -// read.setMateAlignmentStart(start + nClipsOnTheLeft); -// read.setInferredInsertSize(readLength); -// -// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read}); -// } -// } - return tests.toArray(new Object[][]{}); } @Test(enabled = true, dataProvider = "AdapterClippingTest") -// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000) public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) { - li = new LocusIteratorByState(new FakeCloseableIterator(Collections.singletonList(read).iterator()), + li = new LocusIteratorByState(new FakeCloseableIterator<>(Collections.singletonList(read).iterator()), createTestReadProperties(DownsamplingMethod.NONE, false), genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); @@ -755,10 +743,6 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { while ( li.hasNext() ) { final AlignmentContext next = li.next(); Assert.assertEquals(next.getLocation().getStart(), expectedPos); -// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) ) -// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups); -// else -// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups); nPileups++; expectedPos++; } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index abe0c394b..7e085547f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -66,6 +66,8 @@ public class ReadUtilsUnitTest extends BaseTest { final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30}; final String cigar = "8M"; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar); + read.setProperPairFlag(true); + read.setReadPairedFlag(true); read.setMateAlignmentStart(mateStart); read.setInferredInsertSize(fragmentSize); return read; @@ -85,6 +87,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); @@ -93,6 +96,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); @@ -101,6 +105,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); @@ -109,6 +114,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); @@ -116,9 +122,11 @@ public class ReadUtilsUnitTest extends BaseTest { read = makeRead(fragmentSize, mateStart); read.setInferredInsertSize(0); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setInferredInsertSize(10); @@ -226,4 +234,91 @@ public class ReadUtilsUnitTest extends BaseTest { final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL); Assert.assertEquals(result, 3); } + + @DataProvider(name = "HasWellDefinedFragmentSizeData") + public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception { + final List tests = new LinkedList(); + + // setup a basic read that will work + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadUnmappedFlag(false); + read.setMateUnmappedFlag(false); + read.setAlignmentStart(100); + read.setCigarString("50M"); + read.setMateAlignmentStart(130); + read.setInferredInsertSize(80); + read.setFirstOfPairFlag(true); + read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); + + tests.add( new Object[]{ "basic case", read.clone(), true }); + + { + final GATKSAMRecord bad1 = (GATKSAMRecord)read.clone(); + bad1.setReadPairedFlag(false); + tests.add( new Object[]{ "not paired", bad1, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setProperPairFlag(false); + // we currently don't require the proper pair flag to be set + tests.add( new Object[]{ "not proper pair", bad, true }); +// tests.add( new Object[]{ "not proper pair", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadUnmappedFlag(true); + tests.add( new Object[]{ "read is unmapped", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setMateUnmappedFlag(true); + tests.add( new Object[]{ "mate is unmapped", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setMateNegativeStrandFlag(false); + tests.add( new Object[]{ "read and mate both on positive strand", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadNegativeStrandFlag(true); + tests.add( new Object[]{ "read and mate both on negative strand", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setInferredInsertSize(0); + tests.add( new Object[]{ "insert size is 0", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setAlignmentStart(1000); + tests.add( new Object[]{ "positve read starts after mate end", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadNegativeStrandFlag(true); + bad.setMateNegativeStrandFlag(false); + bad.setMateAlignmentStart(1000); + tests.add( new Object[]{ "negative strand read ends before mate starts", bad, false }); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "HasWellDefinedFragmentSizeData") + private void testHasWellDefinedFragmentSize(final String name, final GATKSAMRecord read, final boolean expected) { + Assert.assertEquals(ReadUtils.hasWellDefinedFragmentSize(read), expected); + } }