Merge pull request #347 from broadinstitute/eb_more_dnagling_tail_improvements

More specific fix for the dangling tail edge case with a single leading deletion.
This commit is contained in:
Ryan Poplin 2013-07-26 07:25:47 -07:00
commit f52196496d
2 changed files with 34 additions and 2 deletions

View File

@ -340,8 +340,15 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
return 0;
final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);
// there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
// deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
// tail is a perfect match to the suffix of the reference path. In this case we need to push the reference index to merge
// down one position so that we don't incorrectly cut a base off of the deletion.
final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (firstElementIsDeletion ? 1 : 0); // need to push down if SW tells us to remove the LCA
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
return 1;
}

View File

@ -47,22 +47,27 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
public class HaplotypeCallerIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final static String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
final static String NA12878_BAM = privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
@ -176,6 +181,26 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
}
@Test
public void HCTestDanglingTailMergingForDeletions() throws IOException {
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
// confirm that the call is the correct one
final VCFCodec codec = new VCFCodec();
final FileInputStream s = new FileInputStream(outputVCF);
final AsciiLineReader lineReader = new AsciiLineReader(new PositionalBufferedStream(s));
codec.readHeader(lineReader);
final String line = lineReader.readLine();
Assert.assertFalse(line == null);
final VariantContext vc = codec.decode(line);
Assert.assertTrue(vc.isBiallelic());
Assert.assertTrue(vc.getReference().basesMatch("ATGTATG"));
Assert.assertTrue(vc.getAlternateAllele(0).basesMatch("A"));
}
// --------------------------------------------------------------------------------------------------------------
//
// testing reduced reads