Fix for edge case bug of trying to create insertions/deletions on the edge of contigs.

-- Added integration test using MT that previously failed
This commit is contained in:
Ryan Poplin 2013-03-15 12:31:54 -04:00
parent 0fd40dbde9
commit b8991f5e98
3 changed files with 37 additions and 27 deletions

View File

@ -430,7 +430,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
* @param refWithPadding the full reference byte array with padding which encompasses the active region
* @return a haplotype fully extended to encompass the active region
*/
@Requires({"haplotype != null", "activeRegionStart > 0", "refWithPadding != null", "refWithPadding.length > 0"})
@Requires({"haplotype != null", "activeRegionStart >= 0", "refWithPadding != null", "refWithPadding.length > 0"})
@Ensures({"result != null", "result.getCigar() != null"})
private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) {
final Cigar cigar = haplotype.getCigar();

View File

@ -710,24 +710,26 @@ public class GenotypingEngine {
switch( ce.getOperator() ) {
case I:
{
final List<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
insertionAlleles.add( Allele.create(refByte, true) );
}
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
if( BaseUtils.isAllRegularBases(insertionBases) ) {
insertionAlleles.add( Allele.create(insertionBases, false) );
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
final List<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
insertionAlleles.add( Allele.create(refByte, true) );
}
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
if( BaseUtils.isAllRegularBases(insertionBases) ) {
insertionAlleles.add( Allele.create(insertionBases, false) );
}
}
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
}
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
alignmentPos += elementLength;
break;
@ -739,14 +741,16 @@ public class GenotypingEngine {
}
case D:
{
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final List<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(refByte, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final List<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(refByte, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
}
}
refPos += elementLength;
break;

View File

@ -58,6 +58,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
private void HCTest(String bam, String args, String md5) {
@ -76,7 +77,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10");
}
@Test(enabled = false)
@Test(enabled = false) // can't annotate the rsID's yet
public void testHaplotypeCallerSingleSampleWithDbsnp() {
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
}
@ -98,6 +99,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53");
}
@Test
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "e6f7bbab7cf96cbb25837b7a94bf0f82");
}
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust