Fix for edge case bug of trying to create insertions/deletions on the edge of contigs.
-- Added integration test using MT that previously failed
This commit is contained in:
parent
0fd40dbde9
commit
b8991f5e98
|
|
@ -430,7 +430,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
* @param refWithPadding the full reference byte array with padding which encompasses the active region
|
* @param refWithPadding the full reference byte array with padding which encompasses the active region
|
||||||
* @return a haplotype fully extended to encompass the active region
|
* @return a haplotype fully extended to encompass the active region
|
||||||
*/
|
*/
|
||||||
@Requires({"haplotype != null", "activeRegionStart > 0", "refWithPadding != null", "refWithPadding.length > 0"})
|
@Requires({"haplotype != null", "activeRegionStart >= 0", "refWithPadding != null", "refWithPadding.length > 0"})
|
||||||
@Ensures({"result != null", "result.getCigar() != null"})
|
@Ensures({"result != null", "result.getCigar() != null"})
|
||||||
private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) {
|
private Haplotype extendPartialHaplotype( final Haplotype haplotype, final int activeRegionStart, final byte[] refWithPadding ) {
|
||||||
final Cigar cigar = haplotype.getCigar();
|
final Cigar cigar = haplotype.getCigar();
|
||||||
|
|
|
||||||
|
|
@ -710,24 +710,26 @@ public class GenotypingEngine {
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case I:
|
case I:
|
||||||
{
|
{
|
||||||
final List<Allele> insertionAlleles = new ArrayList<Allele>();
|
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
|
||||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
final List<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||||
final byte refByte = ref[refPos-1];
|
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||||
if( BaseUtils.isRegularBase(refByte) ) {
|
final byte refByte = ref[refPos-1];
|
||||||
insertionAlleles.add( Allele.create(refByte, true) );
|
if( BaseUtils.isRegularBase(refByte) ) {
|
||||||
}
|
insertionAlleles.add( Allele.create(refByte, true) );
|
||||||
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
|
}
|
||||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
if( cigarIndex == 0 || cigarIndex == cigar.getCigarElements().size() - 1 ) { // if the insertion isn't completely resolved in the haplotype then make it a symbolic allele
|
||||||
} else {
|
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||||
byte[] insertionBases = new byte[]{};
|
} else {
|
||||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
byte[] insertionBases = new byte[]{};
|
||||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||||
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
||||||
|
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
||||||
|
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
|
||||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
|
||||||
}
|
}
|
||||||
alignmentPos += elementLength;
|
alignmentPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
|
@ -739,14 +741,16 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
case D:
|
case D:
|
||||||
{
|
{
|
||||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig
|
||||||
final List<Allele> deletionAlleles = new ArrayList<Allele>();
|
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
final List<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||||
final byte refByte = ref[refPos-1];
|
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
final byte refByte = ref[refPos-1];
|
||||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||||
deletionAlleles.add( Allele.create(refByte, false) );
|
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
deletionAlleles.add( Allele.create(refByte, false) );
|
||||||
|
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
refPos += elementLength;
|
refPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||||
|
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
|
||||||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||||
|
|
||||||
private void HCTest(String bam, String args, String md5) {
|
private void HCTest(String bam, String args, String md5) {
|
||||||
|
|
@ -76,7 +77,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10");
|
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false) // can't annotate the rsID's yet
|
||||||
public void testHaplotypeCallerSingleSampleWithDbsnp() {
|
public void testHaplotypeCallerSingleSampleWithDbsnp() {
|
||||||
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
|
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
|
||||||
}
|
}
|
||||||
|
|
@ -98,6 +99,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53");
|
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
|
||||||
|
HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "e6f7bbab7cf96cbb25837b7a94bf0f82");
|
||||||
|
}
|
||||||
|
|
||||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||||
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
|
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
|
||||||
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
|
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue