Bugfix for FragmentUtils.mergeOverlappingPairedFragments
-- The previous version was unclipping soft clipped bases, and these were sometimes adaptor sequences. If the two reads successfully merged, we'd lose all of the information necessary to remove the adaptor, producing a very high quality read that matched reference. Updated the code to first clip the adapter sequences from the incoming fragments -- Update MD5s
This commit is contained in:
parent
43f1746eb9
commit
d20be41fee
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd51f8c7235eb6547b678093c7a01089");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "27db36467d40c3cde201f5826e959d78");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0");
|
||||
HCTest(NA12878_BAM, "", "18d5671d8454e8a0c05ee5f6e9fabfe3");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("eb5772b825120a0b8710e5add485d73a"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -213,24 +214,28 @@ public final class FragmentUtils {
|
|||
*
|
||||
* Assumes that firstRead starts before secondRead (according to their soft clipped starts)
|
||||
*
|
||||
* @param firstRead the left most read
|
||||
* @param firstRead the right most read
|
||||
* @param unclippedFirstRead the left most read
|
||||
* @param unclippedSecondRead the right most read
|
||||
*
|
||||
* @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one
|
||||
*/
|
||||
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) {
|
||||
if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null");
|
||||
if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null");
|
||||
if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead);
|
||||
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord unclippedFirstRead, final GATKSAMRecord unclippedSecondRead) {
|
||||
if ( unclippedFirstRead == null ) throw new IllegalArgumentException("unclippedFirstRead cannot be null");
|
||||
if ( unclippedSecondRead == null ) throw new IllegalArgumentException("unclippedSecondRead cannot be null");
|
||||
if ( ! unclippedFirstRead.getReadName().equals(unclippedSecondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + unclippedFirstRead + " and " + unclippedSecondRead);
|
||||
|
||||
if( unclippedFirstRead.getCigarString().contains("I") || unclippedFirstRead.getCigarString().contains("D") || unclippedSecondRead.getCigarString().contains("I") || unclippedSecondRead.getCigarString().contains("D") ) {
|
||||
return null; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
final GATKSAMRecord firstRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedFirstRead));
|
||||
final GATKSAMRecord secondRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedSecondRead));
|
||||
|
||||
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
||||
return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
||||
}
|
||||
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
||||
return null; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getAlignmentStart());
|
||||
|
||||
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
|
||||
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||
|
|
@ -264,7 +269,7 @@ public final class FragmentUtils {
|
|||
|
||||
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
||||
returnRead.setIsStrandless(true);
|
||||
returnRead.setAlignmentStart( firstRead.getSoftStart() );
|
||||
returnRead.setAlignmentStart( firstRead.getAlignmentStart() );
|
||||
returnRead.setReadBases( bases );
|
||||
returnRead.setBaseQualities( quals );
|
||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
return read;
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "MergeFragmentsTest")
|
||||
@Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest")
|
||||
public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) {
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
|
||||
|
|
@ -240,4 +240,60 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClippingBeforeMerge() {
|
||||
final String common = Utils.dupString("A", 10);
|
||||
final byte[] commonQuals = Utils.dupBytes((byte)30, common.length());
|
||||
final String adapter = "NNNN";
|
||||
|
||||
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, common, commonQuals, "", 30, 10);
|
||||
final GATKSAMRecord read2 = makeOverlappingRead("", 30, common, commonQuals, adapter, 30, 10);
|
||||
final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10);
|
||||
read1.setCigarString("4S" + common.length() + "M");
|
||||
read1.setProperPairFlag(true);
|
||||
read1.setFirstOfPairFlag(true);
|
||||
read1.setReadNegativeStrandFlag(true);
|
||||
read1.setMateAlignmentStart(10);
|
||||
read2.setCigarString(common.length() + "M4S");
|
||||
read2.setProperPairFlag(true);
|
||||
read2.setFirstOfPairFlag(false);
|
||||
read2.setReadNegativeStrandFlag(false);
|
||||
|
||||
final int insertSize = common.length() - 1;
|
||||
read1.setInferredInsertSize(insertSize);
|
||||
read2.setInferredInsertSize(-insertSize);
|
||||
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString());
|
||||
Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases());
|
||||
Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup());
|
||||
Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality());
|
||||
for ( final EventType type : EventType.values() )
|
||||
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClippingBeforeMergeResultingInCompletelyContainedSecondRead() {
|
||||
final String adapter = "NNNN";
|
||||
|
||||
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, Utils.dupString("A", 10), Utils.dupBytes((byte)30, 10), "", 30, 10);
|
||||
final GATKSAMRecord read2 = makeOverlappingRead("", 30, Utils.dupString("A", 7), Utils.dupBytes((byte)30, 7), adapter, 30, 10);
|
||||
read1.setCigarString("4S10M");
|
||||
read1.setProperPairFlag(true);
|
||||
read1.setFirstOfPairFlag(true);
|
||||
read1.setReadNegativeStrandFlag(true);
|
||||
read1.setMateAlignmentStart(10);
|
||||
read2.setCigarString("7M4S");
|
||||
read2.setProperPairFlag(true);
|
||||
read2.setFirstOfPairFlag(false);
|
||||
read2.setReadNegativeStrandFlag(false);
|
||||
|
||||
final int insertSize = 7 - 1;
|
||||
read1.setInferredInsertSize(insertSize);
|
||||
read2.setInferredInsertSize(-insertSize);
|
||||
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertNull(actual);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue