Bug fix in assembly for edge case in which the extendPartialHaplotype function was filling in deletions in the middle of haplotypes.

This commit is contained in:
Ryan Poplin 2013-03-13 15:51:01 -04:00
parent 9d6d1f94b0
commit 0cf5d30dac
4 changed files with 22 additions and 38 deletions

View File

@ -438,7 +438,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
byte[] newHaplotypeBases = haplotype.getBases();
int refPos = activeRegionStart;
int hapPos = 0;
for( CigarElement ce : cigar.getCigarElements() ) {
for( int iii = 0; iii < cigar.getCigarElements().size(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
switch (ce.getOperator()) {
case M:
refPos += ce.getLength();
@ -450,16 +451,17 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
newCigar.add(ce);
break;
case D:
refPos += ce.getLength();
newCigar.add(ce);
break;
case X:
newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos),
ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()),
Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length)));
refPos += ce.getLength();
hapPos += ce.getLength();
newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M));
if( iii == 0 || iii == cigar.getCigarElements().size() - 1 ) {
newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos),
ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()),
Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length)));
hapPos += ce.getLength();
refPos += ce.getLength();
newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M));
} else {
refPos += ce.getLength();
newCigar.add(ce);
}
break;
default:
throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator());

View File

@ -310,31 +310,13 @@ public class KBestPaths {
if( swCigar.numCigarElements() > 6 ) { // this bubble is too divergent from the reference
returnCigar.add(new CigarElement(1, CigarOperator.N));
} else {
int skipElement = -1;
if( fromVertex == null ) {
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
final CigarElement ce = swCigar.getCigarElement(iii);
if( ce.getOperator().equals(CigarOperator.D) ) {
skipElement = iii;
break;
}
}
} else if (toVertex == null ) {
for( int iii = swCigar.numCigarElements() - 1; iii >= 0; iii-- ) {
final CigarElement ce = swCigar.getCigarElement(iii);
if( ce.getOperator().equals(CigarOperator.D) ) {
skipElement = iii;
break;
}
}
}
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
// now we need to remove the padding from the cigar string
int length = swCigar.getCigarElement(iii).getLength();
if( iii == 0 ) { length -= padding.length; }
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
if( length > 0 ) {
returnCigar.add(new CigarElement(length, (skipElement == iii ? CigarOperator.X : swCigar.getCigarElement(iii).getOperator())));
returnCigar.add(new CigarElement(length, swCigar.getCigarElement(iii).getOperator()));
}
}
if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) {

View File

@ -87,12 +87,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"417174e043dbb8b86cc3871da9b50536");
"fd3412030628fccf77effdb1ec03dce7");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"9563e3c1eee2ef46afc7822af0bb58a8");
"633e8930a263e34def5e097889dd9805");
}
}

View File

@ -69,12 +69,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708");
HCTest(CEUTRIO_BAM, "", "694d6ea7f0f305854d4108379d68de75");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10");
HCTest(NA12878_BAM, "", "995501d8af646af3b6eaa4109e2fb4a0");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"fa1b92373c89d2238542a319ad25c257");
"627124af27dc4556d83df1a04e4b9f97");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "205fc8647b908c0dab7b5c6d6b78c0c2");
}
@Test
@ -118,7 +118,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f1250a8ecd404443dcca20741a74ec4f"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -148,7 +148,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9"));
Arrays.asList("d3eb900eecdafafda3170f67adff42ae"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}