More specific fix for the dangling tail edge case with a single leading deletion.
The previous fix was too general (and therefore incorrect) and caused the HC to exception out. Added "unit" test for this exact case.
This commit is contained in:
parent
e5aab22680
commit
0a2b5ddadf
|
|
@ -340,8 +340,15 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
return 0;
|
||||
|
||||
final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);
|
||||
|
||||
// there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
|
||||
// deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
|
||||
// tail is a perfect match to the suffix of the reference path. In this case we need to push the reference index to merge
|
||||
// down one position so that we don't incorrectly cut a base off of the deletion.
|
||||
final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (firstElementIsDeletion ? 1 : 0); // need to push down if SW tells us to remove the LCA
|
||||
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
|
||||
|
||||
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,22 +47,27 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final static String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_BAM = privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
|
|
@ -176,6 +181,26 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestDanglingTailMergingForDeletions() throws IOException {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
|
||||
|
||||
// confirm that the call is the correct one
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
final FileInputStream s = new FileInputStream(outputVCF);
|
||||
final AsciiLineReader lineReader = new AsciiLineReader(new PositionalBufferedStream(s));
|
||||
codec.readHeader(lineReader);
|
||||
final String line = lineReader.readLine();
|
||||
Assert.assertFalse(line == null);
|
||||
final VariantContext vc = codec.decode(line);
|
||||
Assert.assertTrue(vc.isBiallelic());
|
||||
Assert.assertTrue(vc.getReference().basesMatch("ATGTATG"));
|
||||
Assert.assertTrue(vc.getAlternateAllele(0).basesMatch("A"));
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing reduced reads
|
||||
|
|
|
|||
Loading…
Reference in New Issue