Bug fix in assembly for edge case in which the extendPartialHaplotype function was filling in deletions in the middle of haplotypes.

This commit is contained in:
Ryan Poplin 2013-03-13 15:51:01 -04:00
parent 9d6d1f94b0
commit 0cf5d30dac
4 changed files with 22 additions and 38 deletions

View File

@ -438,7 +438,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
byte[] newHaplotypeBases = haplotype.getBases(); byte[] newHaplotypeBases = haplotype.getBases();
int refPos = activeRegionStart; int refPos = activeRegionStart;
int hapPos = 0; int hapPos = 0;
for( CigarElement ce : cigar.getCigarElements() ) { for( int iii = 0; iii < cigar.getCigarElements().size(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
switch (ce.getOperator()) { switch (ce.getOperator()) {
case M: case M:
refPos += ce.getLength(); refPos += ce.getLength();
@ -450,16 +451,17 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
newCigar.add(ce); newCigar.add(ce);
break; break;
case D: case D:
refPos += ce.getLength(); if( iii == 0 || iii == cigar.getCigarElements().size() - 1 ) {
newCigar.add(ce);
break;
case X:
newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos), newHaplotypeBases = ArrayUtils.addAll( Arrays.copyOfRange(newHaplotypeBases, 0, hapPos),
ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()), ArrayUtils.addAll(Arrays.copyOfRange(refWithPadding, refPos, refPos + ce.getLength()),
Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length))); Arrays.copyOfRange(newHaplotypeBases, hapPos, newHaplotypeBases.length)));
refPos += ce.getLength();
hapPos += ce.getLength(); hapPos += ce.getLength();
refPos += ce.getLength();
newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M)); newCigar.add(new CigarElement(ce.getLength(), CigarOperator.M));
} else {
refPos += ce.getLength();
newCigar.add(ce);
}
break; break;
default: default:
throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator()); throw new IllegalStateException("Unsupported cigar operator detected: " + ce.getOperator());

View File

@ -310,31 +310,13 @@ public class KBestPaths {
if( swCigar.numCigarElements() > 6 ) { // this bubble is too divergent from the reference if( swCigar.numCigarElements() > 6 ) { // this bubble is too divergent from the reference
returnCigar.add(new CigarElement(1, CigarOperator.N)); returnCigar.add(new CigarElement(1, CigarOperator.N));
} else { } else {
int skipElement = -1;
if( fromVertex == null ) {
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
final CigarElement ce = swCigar.getCigarElement(iii);
if( ce.getOperator().equals(CigarOperator.D) ) {
skipElement = iii;
break;
}
}
} else if (toVertex == null ) {
for( int iii = swCigar.numCigarElements() - 1; iii >= 0; iii-- ) {
final CigarElement ce = swCigar.getCigarElement(iii);
if( ce.getOperator().equals(CigarOperator.D) ) {
skipElement = iii;
break;
}
}
}
for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) { for( int iii = 0; iii < swCigar.numCigarElements(); iii++ ) {
// now we need to remove the padding from the cigar string // now we need to remove the padding from the cigar string
int length = swCigar.getCigarElement(iii).getLength(); int length = swCigar.getCigarElement(iii).getLength();
if( iii == 0 ) { length -= padding.length; } if( iii == 0 ) { length -= padding.length; }
if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; } if( iii == swCigar.numCigarElements() - 1 ) { length -= padding.length; }
if( length > 0 ) { if( length > 0 ) {
returnCigar.add(new CigarElement(length, (skipElement == iii ? CigarOperator.X : swCigar.getCigarElement(iii).getOperator()))); returnCigar.add(new CigarElement(length, swCigar.getCigarElement(iii).getOperator()));
} }
} }
if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) { if( (refBytes == null && returnCigar.getReferenceLength() != 0) || ( refBytes != null && returnCigar.getReferenceLength() != refBytes.length ) ) {

View File

@ -87,12 +87,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAComplex() { public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"417174e043dbb8b86cc3871da9b50536"); "fd3412030628fccf77effdb1ec03dce7");
} }
@Test @Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"9563e3c1eee2ef46afc7822af0bb58a8"); "633e8930a263e34def5e097889dd9805");
} }
} }

View File

@ -69,12 +69,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSample() { public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708"); HCTest(CEUTRIO_BAM, "", "694d6ea7f0f305854d4108379d68de75");
} }
@Test @Test
public void testHaplotypeCallerSingleSample() { public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "b3bffabb7aafd43e0339958395e6aa10"); HCTest(NA12878_BAM, "", "995501d8af646af3b6eaa4109e2fb4a0");
} }
@Test(enabled = false) // can't annotate the rsID's yet @Test(enabled = false) // can't annotate the rsID's yet
@ -85,7 +85,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"fa1b92373c89d2238542a319ad25c257"); "627124af27dc4556d83df1a04e4b9f97");
} }
private void HCTestIndelQualityScores(String bam, String args, String md5) { private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() { public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "852623c93feef5e62fcb555beedc8c53"); HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "205fc8647b908c0dab7b5c6d6b78c0c2");
} }
@Test @Test
@ -118,7 +118,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void HCTestStructuralIndels() { public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976")); final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f1250a8ecd404443dcca20741a74ec4f"));
executeTest("HCTestStructuralIndels: ", spec); executeTest("HCTestStructuralIndels: ", spec);
} }
@ -148,7 +148,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() { public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9")); Arrays.asList("d3eb900eecdafafda3170f67adff42ae"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
} }
} }