diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index db1ca552a..e1398d48f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -929,28 +929,28 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // Loop through the reads hard clipping the adaptor and low quality tails final List readsToUse = new ArrayList<>(activeRegion.getReads().size()); for( final GATKSAMRecord myRead : activeRegion.getReads() ) { - final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); - if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { - GATKSAMRecord clippedRead; - if (errorCorrectReads) - clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION ); - else if (useLowQualityBasesForAssembly) - clippedRead = postAdapterRead; - else // default case: clip low qual ends of reads - clippedRead= ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); + GATKSAMRecord clippedRead; + if (errorCorrectReads) + clippedRead = ReadClipper.hardClipLowQualEnds( myRead, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION ); + else if (useLowQualityBasesForAssembly) + clippedRead = myRead; + else // default case: clip low qual ends of reads + clippedRead= ReadClipper.hardClipLowQualEnds( myRead, MIN_TAIL_QUALITY ); - if ( dontUseSoftClippedBases ) { - // uncomment to remove hard clips from consideration at all - clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); - } else { - // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches - // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't - // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion - // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the - // TODO -- reference haplotype start must be removed - clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); - } + if ( dontUseSoftClippedBases || ! ReadUtils.hasWellDefinedFragmentSize(clippedRead) ) { + // remove soft clips if we cannot reliably clip off adapter sequence or if the user doesn't want to use soft clips at all + clippedRead = ReadClipper.hardClipSoftClippedBases(clippedRead); + } else { + // revert soft clips so that we see the alignment start and end assuming the soft clips are all matches + // TODO -- WARNING -- still possibility that unclipping the soft clips will introduce bases that aren't + // TODO -- truly in the extended region, as the unclipped bases might actually include a deletion + // TODO -- w.r.t. the reference. What really needs to happen is that kmers that occur before the + // TODO -- reference haplotype start must be removed + clippedRead = ReadClipper.revertSoftClippedBases(clippedRead); + } + clippedRead = ( clippedRead.getReadUnmappedFlag() ? clippedRead : ReadClipper.hardClipAdaptorSequence( clippedRead ) ); + if( clippedRead != null && !clippedRead.isEmpty() && clippedRead.getCigar().getReadLength() > 0 ) { clippedRead = ReadClipper.hardClipToRegion( clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop() ); if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { //logger.info("Keeping read " + clippedRead + " start " + clippedRead.getAlignmentStart() + " end " + clippedRead.getAlignmentEnd()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 0636d7c1b..68e61757f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a3479fc4ad387d381593b328f737a1b"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "12ed9d67139e7a94d67e9e6c06ac6e16"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index aca1172d4..bf3517e3b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("cc6f2a76ee97ecc14a5f956ffbb21d88")); + Arrays.asList("c5c63d03e1c4bbe32f06902acd4a10f9")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("51e91c8af61a6b47807165906baefb00")); + Arrays.asList("f0b2a96040429908cce17327442eec29")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index d009550f4..3b17725f9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "9da4cc89590c4c64a36f4a9c820f8609"}); + tests.add(new Object[]{nct, "e800f6bb3a820da5c6b29f0195480796"}); } return tests.toArray(new Object[][]{}); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index f9393cc4b..ab866013f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -30,10 +30,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.NGSPlatform; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -214,34 +211,52 @@ public class ReadUtils { * CANNOT_COMPUTE_ADAPTOR_BOUNDARY if the read is unmapped or the mate is mapped to another contig. */ public static int getAdaptorBoundary(final SAMRecord read) { - final int MAXIMUM_ADAPTOR_LENGTH = 8; - final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value) - - if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs - return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; - - if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) { - // note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping -// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY", -// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(), -// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag())); + if ( ! hasWellDefinedFragmentSize(read) ) { return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; + } else if ( read.getReadNegativeStrandFlag() ) { + return read.getMateAlignmentStart() - 1; // case 1 (see header) + } else { + final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value) + return read.getAlignmentStart() + insertSize + 1; // case 2 (see header) } - - - int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) - if (read.getReadNegativeStrandFlag()) - adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) - else - adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) - - if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) ) - adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor - - return adaptorBoundary; } + public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE; + /** + * Can the adaptor sequence of read be reliably removed from the read based on the alignment of + * read and its mate? + * + * @param read the read to check + * @return true if it can, false otherwise + */ + public static boolean hasWellDefinedFragmentSize(final SAMRecord read) { + if ( read.getInferredInsertSize() == 0 ) + // no adaptors in reads with mates in another chromosome or unmapped pairs + return false; + if ( ! read.getReadPairedFlag() ) + // only reads that are paired can be adaptor trimmed + return false; + if ( read.getReadUnmappedFlag() || read.getMateUnmappedFlag() ) + // only reads when both reads are mapped can be trimmed + return false; +// if ( ! read.getProperPairFlag() ) +// // note this flag isn't always set properly in BAMs, can will stop us from eliminating some proper pairs +// // reads that aren't part of a proper pair (i.e., have strange alignments) can't be trimmed +// return false; + if ( read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) + // sanity check on getProperPairFlag to ensure that read1 and read2 aren't on the same strand + return false; + + if ( read.getReadNegativeStrandFlag() ) { + // we're on the negative strand, so our read runs right to left + return read.getAlignmentEnd() > read.getMateAlignmentStart(); + } else { + // we're on the positive strand, so our mate should be to our right (his start + insert size should be past our start) + return read.getAlignmentStart() <= read.getMateAlignmentStart() + read.getInferredInsertSize(); + } + } + /** * is the read a 454 read? * diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index 0886427ca..e2e253d0f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -253,17 +253,22 @@ public class FragmentUtilsUnitTest extends BaseTest { final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10); read1.setCigarString("4S" + common.length() + "M"); read1.setProperPairFlag(true); + read1.setReadPairedFlag(true); read1.setFirstOfPairFlag(true); read1.setReadNegativeStrandFlag(true); - read1.setMateAlignmentStart(10); + read1.setMateNegativeStrandFlag(false); + read1.setMateAlignmentStart(read2.getAlignmentStart()); read2.setCigarString(common.length() + "M4S"); read2.setProperPairFlag(true); + read2.setReadPairedFlag(true); read2.setFirstOfPairFlag(false); read2.setReadNegativeStrandFlag(false); + read2.setMateNegativeStrandFlag(true); + read2.setMateAlignmentStart(read1.getAlignmentStart()); final int insertSize = common.length() - 1; - read1.setInferredInsertSize(insertSize); - read2.setInferredInsertSize(-insertSize); + read1.setInferredInsertSize(-insertSize); + read2.setInferredInsertSize(insertSize); final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index d2f29ee7a..54554ee64 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -54,7 +54,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { private static final boolean DEBUG = false; protected LocusIteratorByState li; - @Test(enabled = true) + @Test(enabled = !DEBUG) public void testUnmappedAndAllIReadsPassThrough() { final int readLength = 10; GATKSAMRecord mapped1 = ArtificialSAMUtils.createArtificialRead(header,"mapped1",0,1,readLength); @@ -697,24 +697,28 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { final List tests = new LinkedList(); final int start = 10; -// for ( final int goodBases : Arrays.asList(10) ) { -// for ( final int nClipsOnTheRight : Arrays.asList(0)) { for ( final int goodBases : Arrays.asList(10, 20, 30) ) { for ( final int nClips : Arrays.asList(0, 1, 2, 10)) { for ( final boolean onLeft : Arrays.asList(true, false) ) { final int readLength = nClips + goodBases; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); + read.setProperPairFlag(true); + read.setReadPairedFlag(true); + read.setReadUnmappedFlag(false); + read.setMateUnmappedFlag(false); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); read.setCigarString(readLength + "M"); if ( onLeft ) { read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); read.setMateAlignmentStart(start + nClips); read.setInferredInsertSize(readLength); tests.add(new Object[]{nClips, goodBases, 0, read}); } else { read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); read.setMateAlignmentStart(start - 1); read.setInferredInsertSize(goodBases - 1); tests.add(new Object[]{0, goodBases, nClips, read}); @@ -723,29 +727,13 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { } } -// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) { -// final int readLength = nClipsOnTheLeft + goodBases; -// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); -// read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); -// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); -// read.setCigarString(readLength + "M"); -// read.setReadNegativeStrandFlag(true); -// -// read.setMateAlignmentStart(start + nClipsOnTheLeft); -// read.setInferredInsertSize(readLength); -// -// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read}); -// } -// } - return tests.toArray(new Object[][]{}); } @Test(enabled = true, dataProvider = "AdapterClippingTest") -// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000) public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) { - li = new LocusIteratorByState(new FakeCloseableIterator(Collections.singletonList(read).iterator()), + li = new LocusIteratorByState(new FakeCloseableIterator<>(Collections.singletonList(read).iterator()), createTestReadProperties(DownsamplingMethod.NONE, false), genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); @@ -755,10 +743,6 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { while ( li.hasNext() ) { final AlignmentContext next = li.next(); Assert.assertEquals(next.getLocation().getStart(), expectedPos); -// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) ) -// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups); -// else -// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups); nPileups++; expectedPos++; } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index abe0c394b..7e085547f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -66,6 +66,8 @@ public class ReadUtilsUnitTest extends BaseTest { final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30}; final String cigar = "8M"; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar); + read.setProperPairFlag(true); + read.setReadPairedFlag(true); read.setMateAlignmentStart(mateStart); read.setInferredInsertSize(fragmentSize); return read; @@ -85,6 +87,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); @@ -93,6 +96,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); @@ -101,6 +105,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); @@ -109,6 +114,7 @@ public class ReadUtilsUnitTest extends BaseTest { myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); @@ -116,9 +122,11 @@ public class ReadUtilsUnitTest extends BaseTest { read = makeRead(fragmentSize, mateStart); read.setInferredInsertSize(0); read.setReadNegativeStrandFlag(true); + read.setMateNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setInferredInsertSize(10); @@ -226,4 +234,91 @@ public class ReadUtilsUnitTest extends BaseTest { final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL); Assert.assertEquals(result, 3); } + + @DataProvider(name = "HasWellDefinedFragmentSizeData") + public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception { + final List tests = new LinkedList(); + + // setup a basic read that will work + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadUnmappedFlag(false); + read.setMateUnmappedFlag(false); + read.setAlignmentStart(100); + read.setCigarString("50M"); + read.setMateAlignmentStart(130); + read.setInferredInsertSize(80); + read.setFirstOfPairFlag(true); + read.setReadNegativeStrandFlag(false); + read.setMateNegativeStrandFlag(true); + + tests.add( new Object[]{ "basic case", read.clone(), true }); + + { + final GATKSAMRecord bad1 = (GATKSAMRecord)read.clone(); + bad1.setReadPairedFlag(false); + tests.add( new Object[]{ "not paired", bad1, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setProperPairFlag(false); + // we currently don't require the proper pair flag to be set + tests.add( new Object[]{ "not proper pair", bad, true }); +// tests.add( new Object[]{ "not proper pair", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadUnmappedFlag(true); + tests.add( new Object[]{ "read is unmapped", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setMateUnmappedFlag(true); + tests.add( new Object[]{ "mate is unmapped", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setMateNegativeStrandFlag(false); + tests.add( new Object[]{ "read and mate both on positive strand", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadNegativeStrandFlag(true); + tests.add( new Object[]{ "read and mate both on negative strand", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setInferredInsertSize(0); + tests.add( new Object[]{ "insert size is 0", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setAlignmentStart(1000); + tests.add( new Object[]{ "positve read starts after mate end", bad, false }); + } + + { + final GATKSAMRecord bad = (GATKSAMRecord)read.clone(); + bad.setReadNegativeStrandFlag(true); + bad.setMateNegativeStrandFlag(false); + bad.setMateAlignmentStart(1000); + tests.add( new Object[]{ "negative strand read ends before mate starts", bad, false }); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "HasWellDefinedFragmentSizeData") + private void testHasWellDefinedFragmentSize(final String name, final GATKSAMRecord read, final boolean expected) { + Assert.assertEquals(ReadUtils.hasWellDefinedFragmentSize(read), expected); + } }