Merge pull request #189 from broadinstitute/md_hc_clip_before_merging

Bugfix for FragmentUtils.mergeOverlappingPairedFragments
This commit is contained in:
Ryan Poplin 2013-04-25 08:42:03 -07:00
commit 3c7db87527
4 changed files with 76 additions and 15 deletions

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd51f8c7235eb6547b678093c7a01089");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "27db36467d40c3cde201f5826e959d78");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {

View File

@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0");
HCTest(NA12878_BAM, "", "18d5671d8454e8a0c05ee5f6e9fabfe3");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("eb5772b825120a0b8710e5add485d73a"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922"));
executeTest("HCTestStructuralIndels: ", spec);
}

View File

@ -31,6 +31,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -213,24 +214,28 @@ public final class FragmentUtils {
*
* Assumes that firstRead starts before secondRead (according to their soft clipped starts)
*
* @param firstRead the left most read
* @param firstRead the right most read
* @param unclippedFirstRead the left most read
* @param unclippedSecondRead the right most read
*
* @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one
*/
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) {
if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null");
if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null");
if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead);
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord unclippedFirstRead, final GATKSAMRecord unclippedSecondRead) {
if ( unclippedFirstRead == null ) throw new IllegalArgumentException("unclippedFirstRead cannot be null");
if ( unclippedSecondRead == null ) throw new IllegalArgumentException("unclippedSecondRead cannot be null");
if ( ! unclippedFirstRead.getReadName().equals(unclippedSecondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + unclippedFirstRead + " and " + unclippedSecondRead);
if( unclippedFirstRead.getCigarString().contains("I") || unclippedFirstRead.getCigarString().contains("D") || unclippedSecondRead.getCigarString().contains("I") || unclippedSecondRead.getCigarString().contains("D") ) {
return null; // fragments contain indels so don't merge them
}
final GATKSAMRecord firstRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedFirstRead));
final GATKSAMRecord secondRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedSecondRead));
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
}
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
return null; // fragments contain indels so don't merge them
}
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getAlignmentStart());
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
final int numBases = firstReadStop + secondRead.getReadLength();
@ -264,7 +269,7 @@ public final class FragmentUtils {
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
returnRead.setIsStrandless(true);
returnRead.setAlignmentStart( firstRead.getSoftStart() );
returnRead.setAlignmentStart( firstRead.getAlignmentStart() );
returnRead.setReadBases( bases );
returnRead.setBaseQualities( quals );
returnRead.setReadGroup( firstRead.getReadGroup() );

View File

@ -222,7 +222,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
return read;
}
@Test(enabled = true, dataProvider = "MergeFragmentsTest")
@Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest")
public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) {
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
@ -240,4 +240,60 @@ public class FragmentUtilsUnitTest extends BaseTest {
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
}
}
@Test(enabled = !DEBUG)
public void testHardClippingBeforeMerge() {
final String common = Utils.dupString("A", 10);
final byte[] commonQuals = Utils.dupBytes((byte)30, common.length());
final String adapter = "NNNN";
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, common, commonQuals, "", 30, 10);
final GATKSAMRecord read2 = makeOverlappingRead("", 30, common, commonQuals, adapter, 30, 10);
final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10);
read1.setCigarString("4S" + common.length() + "M");
read1.setProperPairFlag(true);
read1.setFirstOfPairFlag(true);
read1.setReadNegativeStrandFlag(true);
read1.setMateAlignmentStart(10);
read2.setCigarString(common.length() + "M4S");
read2.setProperPairFlag(true);
read2.setFirstOfPairFlag(false);
read2.setReadNegativeStrandFlag(false);
final int insertSize = common.length() - 1;
read1.setInferredInsertSize(insertSize);
read2.setInferredInsertSize(-insertSize);
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString());
Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases());
Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup());
Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality());
for ( final EventType type : EventType.values() )
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
}
@Test(enabled = true)
public void testHardClippingBeforeMergeResultingInCompletelyContainedSecondRead() {
final String adapter = "NNNN";
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, Utils.dupString("A", 10), Utils.dupBytes((byte)30, 10), "", 30, 10);
final GATKSAMRecord read2 = makeOverlappingRead("", 30, Utils.dupString("A", 7), Utils.dupBytes((byte)30, 7), adapter, 30, 10);
read1.setCigarString("4S10M");
read1.setProperPairFlag(true);
read1.setFirstOfPairFlag(true);
read1.setReadNegativeStrandFlag(true);
read1.setMateAlignmentStart(10);
read2.setCigarString("7M4S");
read2.setProperPairFlag(true);
read2.setFirstOfPairFlag(false);
read2.setReadNegativeStrandFlag(false);
final int insertSize = 7 - 1;
read1.setInferredInsertSize(insertSize);
read2.setInferredInsertSize(-insertSize);
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
Assert.assertNull(actual);
}
}