diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 17f04971b..5fe4e6dfa 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd51f8c7235eb6547b678093c7a01089"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "27db36467d40c3cde201f5826e959d78"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 2664f3ed0..fff1c0bb9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0"); + HCTest(NA12878_BAM, "", "18d5671d8454e8a0c05ee5f6e9fabfe3"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("eb5772b825120a0b8710e5add485d73a")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922")); executeTest("HCTestStructuralIndels: ", spec); } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 99f1d99c7..5d882ba8c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -31,6 +31,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -213,24 +214,28 @@ public final class FragmentUtils { * * Assumes that firstRead starts before secondRead (according to their soft clipped starts) * - * @param firstRead the left most read - * @param firstRead the right most read + * @param unclippedFirstRead the left most read + * @param unclippedSecondRead the right most read * * @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one */ - public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) { - if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null"); - if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null"); - if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead); + public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord unclippedFirstRead, final GATKSAMRecord unclippedSecondRead) { + if ( unclippedFirstRead == null ) throw new IllegalArgumentException("unclippedFirstRead cannot be null"); + if ( unclippedSecondRead == null ) throw new IllegalArgumentException("unclippedSecondRead cannot be null"); + if ( ! unclippedFirstRead.getReadName().equals(unclippedSecondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + unclippedFirstRead + " and " + unclippedSecondRead); + + if( unclippedFirstRead.getCigarString().contains("I") || unclippedFirstRead.getCigarString().contains("D") || unclippedSecondRead.getCigarString().contains("I") || unclippedSecondRead.getCigarString().contains("D") ) { + return null; // fragments contain indels so don't merge them + } + + final GATKSAMRecord firstRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedFirstRead)); + final GATKSAMRecord secondRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedSecondRead)); if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A } - if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) { - return null; // fragments contain indels so don't merge them - } - final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart()); + final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getAlignmentStart()); final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() ); final int numBases = firstReadStop + secondRead.getReadLength(); @@ -264,7 +269,7 @@ public final class FragmentUtils { final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() ); returnRead.setIsStrandless(true); - returnRead.setAlignmentStart( firstRead.getSoftStart() ); + returnRead.setAlignmentStart( firstRead.getAlignmentStart() ); returnRead.setReadBases( bases ); returnRead.setBaseQualities( quals ); returnRead.setReadGroup( firstRead.getReadGroup() ); diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index 4f49eb933..e9600480a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -222,7 +222,7 @@ public class FragmentUtilsUnitTest extends BaseTest { return read; } - @Test(enabled = true, dataProvider = "MergeFragmentsTest") + @Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest") public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) { final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); @@ -240,4 +240,60 @@ public class FragmentUtilsUnitTest extends BaseTest { Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type); } } + + @Test(enabled = !DEBUG) + public void testHardClippingBeforeMerge() { + final String common = Utils.dupString("A", 10); + final byte[] commonQuals = Utils.dupBytes((byte)30, common.length()); + final String adapter = "NNNN"; + + final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, common, commonQuals, "", 30, 10); + final GATKSAMRecord read2 = makeOverlappingRead("", 30, common, commonQuals, adapter, 30, 10); + final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10); + read1.setCigarString("4S" + common.length() + "M"); + read1.setProperPairFlag(true); + read1.setFirstOfPairFlag(true); + read1.setReadNegativeStrandFlag(true); + read1.setMateAlignmentStart(10); + read2.setCigarString(common.length() + "M4S"); + read2.setProperPairFlag(true); + read2.setFirstOfPairFlag(false); + read2.setReadNegativeStrandFlag(false); + + final int insertSize = common.length() - 1; + read1.setInferredInsertSize(insertSize); + read2.setInferredInsertSize(-insertSize); + + final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); + Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); + Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases()); + Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup()); + Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality()); + for ( final EventType type : EventType.values() ) + Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type); + } + + @Test(enabled = true) + public void testHardClippingBeforeMergeResultingInCompletelyContainedSecondRead() { + final String adapter = "NNNN"; + + final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, Utils.dupString("A", 10), Utils.dupBytes((byte)30, 10), "", 30, 10); + final GATKSAMRecord read2 = makeOverlappingRead("", 30, Utils.dupString("A", 7), Utils.dupBytes((byte)30, 7), adapter, 30, 10); + read1.setCigarString("4S10M"); + read1.setProperPairFlag(true); + read1.setFirstOfPairFlag(true); + read1.setReadNegativeStrandFlag(true); + read1.setMateAlignmentStart(10); + read2.setCigarString("7M4S"); + read2.setProperPairFlag(true); + read2.setFirstOfPairFlag(false); + read2.setReadNegativeStrandFlag(false); + + final int insertSize = 7 - 1; + read1.setInferredInsertSize(insertSize); + read2.setInferredInsertSize(-insertSize); + + final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); + Assert.assertNull(actual); + } }