Merge pull request #347 from broadinstitute/eb_more_dnagling_tail_improvements
More specific fix for the dangling tail edge case with a single leading deletion.
This commit is contained in:
commit
f52196496d
|
|
@ -340,8 +340,15 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
return 0;
|
||||
|
||||
final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);
|
||||
|
||||
// there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
|
||||
// deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
|
||||
// tail is a perfect match to the suffix of the reference path. In this case we need to push the reference index to merge
|
||||
// down one position so that we don't incorrectly cut a base off of the deletion.
|
||||
final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (firstElementIsDeletion ? 1 : 0); // need to push down if SW tells us to remove the LCA
|
||||
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
|
||||
|
||||
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,22 +47,27 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final static String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_BAM = privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
|
|
@ -176,6 +181,26 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestDanglingTailMergingForDeletions() throws IOException {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
|
||||
|
||||
// confirm that the call is the correct one
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
final FileInputStream s = new FileInputStream(outputVCF);
|
||||
final AsciiLineReader lineReader = new AsciiLineReader(new PositionalBufferedStream(s));
|
||||
codec.readHeader(lineReader);
|
||||
final String line = lineReader.readLine();
|
||||
Assert.assertFalse(line == null);
|
||||
final VariantContext vc = codec.decode(line);
|
||||
Assert.assertTrue(vc.isBiallelic());
|
||||
Assert.assertTrue(vc.getReference().basesMatch("ATGTATG"));
|
||||
Assert.assertTrue(vc.getAlternateAllele(0).basesMatch("A"));
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing reduced reads
|
||||
|
|
|
|||
Loading…
Reference in New Issue