Fix for edge case bug of trying to create insertions/deletions on the edge of contigs.
-- Added integration test using MT that previously failed
This commit is contained in:
parent
0fd40dbde9
commit
b8991f5e98
|
|
@ -430,7 +430,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
|||
* @param refWithPadding the full reference byte array with padding which encompasses the active region
|
||||
* @return a haplotype fully extended to encompass the active region
|
||||
*/
|
||||
@Requires({"haplotype != null", "activeRegionStart > 0", "refWithPadding != null", "refWithPadding.length > 0"})
|
||||
@Requires({"haplotype != null", "activeRegionStart >= 0", "refWithPadding != null", "refWithPadding.length > 0"})
|
||||
@Ensures({"result != null", "result.getCigar() != null"})
|
||||
private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) {
|
||||
final Cigar cigar = haplotype.getCigar();
|
||||
|
|
|
|||
|
|
@ -710,24 +710,26 @@ public class GenotypingEngine {
|
|||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
{
|
||||
final List<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) ) {
|
||||
insertionAlleles.add( Allele.create(refByte, true) );
|
||||
}
|
||||
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
|
||||
final List<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) ) {
|
||||
insertionAlleles.add( Allele.create(refByte, true) );
|
||||
}
|
||||
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
}
|
||||
}
|
||||
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
}
|
||||
}
|
||||
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
}
|
||||
alignmentPos += elementLength;
|
||||
break;
|
||||
|
|
@ -739,14 +741,16 @@ public class GenotypingEngine {
|
|||
}
|
||||
case D:
|
||||
{
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||
final List<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||
deletionAlleles.add( Allele.create(refByte, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||
final List<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||
deletionAlleles.add( Allele.create(refByte, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||
}
|
||||
}
|
||||
refPos += elementLength;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
|
||||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
|
||||
private void HCTest(String bam, String args, String md5) {
|
||||
|
|
@ -76,7 +77,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = false) // can't annotate the rsID's yet
|
||||
public void testHaplotypeCallerSingleSampleWithDbsnp() {
|
||||
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
|
||||
}
|
||||
|
|
@ -98,6 +99,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
|
||||
HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "e6f7bbab7cf96cbb25837b7a94bf0f82");
|
||||
}
|
||||
|
||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
|
||||
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
|
||||
|
|
|
|||
Loading…
Reference in New Issue