Merge pull request #189 from broadinstitute/md_hc_clip_before_merging
Bugfix for FragmentUtils.mergeOverlappingPairedFragments
This commit is contained in:
commit
3c7db87527
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd51f8c7235eb6547b678093c7a01089");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "27db36467d40c3cde201f5826e959d78");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "c1530f2158cb41d50e830ca5be0f97a0");
|
||||
HCTest(NA12878_BAM, "", "18d5671d8454e8a0c05ee5f6e9fabfe3");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
|
|
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("eb5772b825120a0b8710e5add485d73a"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -213,24 +214,28 @@ public final class FragmentUtils {
|
|||
*
|
||||
* Assumes that firstRead starts before secondRead (according to their soft clipped starts)
|
||||
*
|
||||
* @param firstRead the left most read
|
||||
* @param firstRead the right most read
|
||||
* @param unclippedFirstRead the left most read
|
||||
* @param unclippedSecondRead the right most read
|
||||
*
|
||||
* @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one
|
||||
*/
|
||||
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) {
|
||||
if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null");
|
||||
if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null");
|
||||
if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead);
|
||||
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord unclippedFirstRead, final GATKSAMRecord unclippedSecondRead) {
|
||||
if ( unclippedFirstRead == null ) throw new IllegalArgumentException("unclippedFirstRead cannot be null");
|
||||
if ( unclippedSecondRead == null ) throw new IllegalArgumentException("unclippedSecondRead cannot be null");
|
||||
if ( ! unclippedFirstRead.getReadName().equals(unclippedSecondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + unclippedFirstRead + " and " + unclippedSecondRead);
|
||||
|
||||
if( unclippedFirstRead.getCigarString().contains("I") || unclippedFirstRead.getCigarString().contains("D") || unclippedSecondRead.getCigarString().contains("I") || unclippedSecondRead.getCigarString().contains("D") ) {
|
||||
return null; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
final GATKSAMRecord firstRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedFirstRead));
|
||||
final GATKSAMRecord secondRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedSecondRead));
|
||||
|
||||
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
||||
return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
||||
}
|
||||
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
||||
return null; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getAlignmentStart());
|
||||
|
||||
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
|
||||
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||
|
|
@ -264,7 +269,7 @@ public final class FragmentUtils {
|
|||
|
||||
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
||||
returnRead.setIsStrandless(true);
|
||||
returnRead.setAlignmentStart( firstRead.getSoftStart() );
|
||||
returnRead.setAlignmentStart( firstRead.getAlignmentStart() );
|
||||
returnRead.setReadBases( bases );
|
||||
returnRead.setBaseQualities( quals );
|
||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
return read;
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "MergeFragmentsTest")
|
||||
@Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest")
|
||||
public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) {
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
|
||||
|
|
@ -240,4 +240,60 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testHardClippingBeforeMerge() {
|
||||
final String common = Utils.dupString("A", 10);
|
||||
final byte[] commonQuals = Utils.dupBytes((byte)30, common.length());
|
||||
final String adapter = "NNNN";
|
||||
|
||||
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, common, commonQuals, "", 30, 10);
|
||||
final GATKSAMRecord read2 = makeOverlappingRead("", 30, common, commonQuals, adapter, 30, 10);
|
||||
final GATKSAMRecord expectedMerged = makeOverlappingRead("", 30, common, commonQuals, "", 30, 10);
|
||||
read1.setCigarString("4S" + common.length() + "M");
|
||||
read1.setProperPairFlag(true);
|
||||
read1.setFirstOfPairFlag(true);
|
||||
read1.setReadNegativeStrandFlag(true);
|
||||
read1.setMateAlignmentStart(10);
|
||||
read2.setCigarString(common.length() + "M4S");
|
||||
read2.setProperPairFlag(true);
|
||||
read2.setFirstOfPairFlag(false);
|
||||
read2.setReadNegativeStrandFlag(false);
|
||||
|
||||
final int insertSize = common.length() - 1;
|
||||
read1.setInferredInsertSize(insertSize);
|
||||
read2.setInferredInsertSize(-insertSize);
|
||||
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString());
|
||||
Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases());
|
||||
Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup());
|
||||
Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality());
|
||||
for ( final EventType type : EventType.values() )
|
||||
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClippingBeforeMergeResultingInCompletelyContainedSecondRead() {
|
||||
final String adapter = "NNNN";
|
||||
|
||||
final GATKSAMRecord read1 = makeOverlappingRead(adapter, 30, Utils.dupString("A", 10), Utils.dupBytes((byte)30, 10), "", 30, 10);
|
||||
final GATKSAMRecord read2 = makeOverlappingRead("", 30, Utils.dupString("A", 7), Utils.dupBytes((byte)30, 7), adapter, 30, 10);
|
||||
read1.setCigarString("4S10M");
|
||||
read1.setProperPairFlag(true);
|
||||
read1.setFirstOfPairFlag(true);
|
||||
read1.setReadNegativeStrandFlag(true);
|
||||
read1.setMateAlignmentStart(10);
|
||||
read2.setCigarString("7M4S");
|
||||
read2.setProperPairFlag(true);
|
||||
read2.setFirstOfPairFlag(false);
|
||||
read2.setReadNegativeStrandFlag(false);
|
||||
|
||||
final int insertSize = 7 - 1;
|
||||
read1.setInferredInsertSize(insertSize);
|
||||
read2.setInferredInsertSize(-insertSize);
|
||||
|
||||
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||
Assert.assertNull(actual);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue