Improvements to the reference model pipeline.

-- We use the RegenotypeVariants walker to recompute the qual field. (instead of the discussed idea of adding this functionality to CombineVariants)
-- QualByDepth will now be recomputed even if the stratified contexts are missing. This greatly improves the QD estimate for this pipeline. Doesn't work for multi-allelics since the qual can't be recomputed.
This commit is contained in:
Ryan Poplin 2013-09-30 10:17:43 -04:00
parent cafcb34855
commit b22c9c2cb4
4 changed files with 58 additions and 9 deletions

View File

@ -54,6 +54,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.coverage.DepthOfCoverage;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
@ -94,19 +96,20 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( !genotype.isHet() && !genotype.isHomVar() )
continue;
if (stratifiedContexts!= null) {
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if (stratifiedContexts!= null && !stratifiedContexts.isEmpty()) {
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
continue;
depth += context.getBasePileup().depthOfCoverage();
}
else if (perReadAlleleLikelihoodMap != null) {
PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
} else if (perReadAlleleLikelihoodMap != null) {
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
continue;
depth += perReadAlleleLikelihoods.getNumberOfStoredElements();
} else if (genotype.hasDP() && vc.isBiallelic()) { // TODO -- this currently only works with biallelic variants for now because multiallelics have had their PLs stripped out and therefore their qual score can't be recomputed
depth += genotype.getDP();
}
}
@ -116,7 +119,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
final double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc);
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
QD = fixTooHighQD(QD);
Map<String, Object> map = new HashMap<String, Object>();
Map<String, Object> map = new HashMap<>();
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}

View File

@ -83,7 +83,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
final GenotypesContext genotypes = vc.getGenotypes();
if (genotypes == null || genotypes.size() == 0)

View File

@ -345,4 +345,52 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
Assert.assertFalse(lineIterator.hasNext());
Assert.assertFalse(lineIteratorAnn.hasNext());
}
@Test
public void testQualByDepth() throws IOException {
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("testQualByDepth", spec).getFirst().get(0);
final String baseNoQD = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA QualByDepth";
final WalkerTestSpec specNoQD = new WalkerTestSpec(baseNoQD, 1, Arrays.asList(""));
specNoQD.disableShadowBCF();
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("139a4384f5a7c1f49ada67f416642249"));
specAnn.disableShadowBCF();
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
// confirm that the QD values are present in the new file for all biallelic variants
// QD values won't be identical because some filtered reads are missing during re-annotation
final VCFCodec codec = new VCFCodec();
final FileInputStream s = new FileInputStream(outputVCF);
final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s));
codec.readHeader(lineIterator);
final VCFCodec codecAnn = new VCFCodec();
final FileInputStream sAnn = new FileInputStream(outputVCFAnn);
final LineIterator lineIteratorAnn = codecAnn.makeSourceFromStream(new PositionalBufferedStream(sAnn));
codecAnn.readHeader(lineIteratorAnn);
while( lineIterator.hasNext() && lineIteratorAnn.hasNext() ) {
final String line = lineIterator.next();
Assert.assertFalse(line == null);
final VariantContext vc = codec.decode(line);
final String lineAnn = lineIteratorAnn.next();
Assert.assertFalse(lineAnn == null);
final VariantContext vcAnn = codecAnn.decode(lineAnn);
if( vc.isBiallelic() ) {
Assert.assertTrue(vc.hasAttribute("QD"));
Assert.assertTrue(vcAnn.hasAttribute("QD"));
}
}
Assert.assertFalse(lineIterator.hasNext());
Assert.assertFalse(lineIteratorAnn.hasNext());
}
}

View File

@ -235,8 +235,6 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
vcfWriter.writeHeader(vcfHeader);
}
private void validateAnnotateUnionArguments() {
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);