Improvements to the reference model pipeline.
-- We use the RegenotypeVariants walker to recompute the qual field. (instead of the discussed idea of adding this functionality to CombineVariants) -- QualByDepth will now be recomputed even if the stratified contexts are missing. This greatly improves the QD estimate for this pipeline. Doesn't work for multi-allelics since the qual can't be recomputed.
This commit is contained in:
parent
cafcb34855
commit
b22c9c2cb4
|
|
@ -54,6 +54,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverage;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
|
|
@ -94,19 +96,20 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||
continue;
|
||||
|
||||
if (stratifiedContexts!= null) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (stratifiedContexts!= null && !stratifiedContexts.isEmpty()) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
|
||||
}
|
||||
else if (perReadAlleleLikelihoodMap != null) {
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
} else if (perReadAlleleLikelihoodMap != null) {
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
|
||||
continue;
|
||||
|
||||
depth += perReadAlleleLikelihoods.getNumberOfStoredElements();
|
||||
} else if (genotype.hasDP() && vc.isBiallelic()) { // TODO -- this currently only works with biallelic variants for now because multiallelics have had their PLs stripped out and therefore their qual score can't be recomputed
|
||||
depth += genotype.getDP();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -116,7 +119,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
final double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc);
|
||||
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
|
||||
QD = fixTooHighQD(QD);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
return map;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
|||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
|
||||
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if (genotypes == null || genotypes.size() == 0)
|
||||
|
|
|
|||
|
|
@ -345,4 +345,52 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
Assert.assertFalse(lineIterator.hasNext());
|
||||
Assert.assertFalse(lineIteratorAnn.hasNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQualByDepth() throws IOException {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
final File outputVCF = executeTest("testQualByDepth", spec).getFirst().get(0);
|
||||
|
||||
final String baseNoQD = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA QualByDepth";
|
||||
final WalkerTestSpec specNoQD = new WalkerTestSpec(baseNoQD, 1, Arrays.asList(""));
|
||||
specNoQD.disableShadowBCF();
|
||||
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("139a4384f5a7c1f49ada67f416642249"));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
|
||||
|
||||
// confirm that the QD values are present in the new file for all biallelic variants
|
||||
// QD values won't be identical because some filtered reads are missing during re-annotation
|
||||
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
final FileInputStream s = new FileInputStream(outputVCF);
|
||||
final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s));
|
||||
codec.readHeader(lineIterator);
|
||||
|
||||
final VCFCodec codecAnn = new VCFCodec();
|
||||
final FileInputStream sAnn = new FileInputStream(outputVCFAnn);
|
||||
final LineIterator lineIteratorAnn = codecAnn.makeSourceFromStream(new PositionalBufferedStream(sAnn));
|
||||
codecAnn.readHeader(lineIteratorAnn);
|
||||
|
||||
while( lineIterator.hasNext() && lineIteratorAnn.hasNext() ) {
|
||||
final String line = lineIterator.next();
|
||||
Assert.assertFalse(line == null);
|
||||
final VariantContext vc = codec.decode(line);
|
||||
|
||||
final String lineAnn = lineIteratorAnn.next();
|
||||
Assert.assertFalse(lineAnn == null);
|
||||
final VariantContext vcAnn = codecAnn.decode(lineAnn);
|
||||
|
||||
if( vc.isBiallelic() ) {
|
||||
Assert.assertTrue(vc.hasAttribute("QD"));
|
||||
Assert.assertTrue(vcAnn.hasAttribute("QD"));
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(lineIterator.hasNext());
|
||||
Assert.assertFalse(lineIteratorAnn.hasNext());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -235,8 +235,6 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void validateAnnotateUnionArguments() {
|
||||
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue