Merge pull request #1490 from broadinstitute/rhl_ggvcf_hom_ref_high_mq

Remove RankSumTest and RMSAnnotation from hom-ref sites
This commit is contained in:
Ron Levine 2016-10-14 14:41:49 -04:00 committed by GitHub
commit 0d2a3e754c
3 changed files with 72 additions and 14 deletions

View File

@ -64,9 +64,12 @@ import org.broadinstitute.gatk.engine.walkers.Reference;
import org.broadinstitute.gatk.engine.walkers.RodWalker; import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.engine.walkers.TreeReducible; import org.broadinstitute.gatk.engine.walkers.TreeReducible;
import org.broadinstitute.gatk.engine.walkers.Window; import org.broadinstitute.gatk.engine.walkers.Window;
import org.broadinstitute.gatk.tools.walkers.annotator.RankSumTest;
import org.broadinstitute.gatk.tools.walkers.annotator.RMSAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AS_StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AS_StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode; import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedArgumentCollection;
@ -188,6 +191,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
private UnifiedGenotypingEngine genotypingEngine; private UnifiedGenotypingEngine genotypingEngine;
// the annotation engine // the annotation engine
private VariantAnnotatorEngine annotationEngine; private VariantAnnotatorEngine annotationEngine;
// the INFO field annotation key names to remove
private final List<String> infoFieldAnnotationKeyNamesToRemove = new ArrayList<>();
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); } public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; } public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
@ -223,6 +228,16 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
annotationEngine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, Collections.<String>emptyList(), this, toolkit); annotationEngine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, Collections.<String>emptyList(), this, toolkit);
// Request INFO field annotations inheriting from RankSumTest and RMSAnnotation added to remove list
for ( final InfoFieldAnnotation annotation : annotationEngine.getRequestedInfoAnnotations() ) {
if ( annotation instanceof RankSumTest || annotation instanceof RMSAnnotation ) {
final List<String> keyNames = annotation.getKeyNames();
if ( !keyNames.isEmpty() ) {
infoFieldAnnotationKeyNamesToRemove.add(keyNames.get(0));
}
}
}
// create the genotyping engine // create the genotyping engine
// when checking for presence of AS_StandardAnnotation we must deal with annoying feature that // when checking for presence of AS_StandardAnnotation we must deal with annoying feature that
// the class name with or without the trailing "Annotation" are both valid command lines // the class name with or without the trailing "Annotation" are both valid command lines
@ -321,7 +336,6 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
//do trimming after allele-specific annotation reduction or the mapping is difficult //do trimming after allele-specific annotation reduction or the mapping is difficult
result = GATKVariantContextUtils.reverseTrimAlleles(result); result = GATKVariantContextUtils.reverseTrimAlleles(result);
// Re-annotate and fix/remove some of the original annotations. // Re-annotate and fix/remove some of the original annotations.
// Note that the order of these actions matters and is different for polymorphic and monomorphic sites. // Note that the order of these actions matters and is different for polymorphic and monomorphic sites.
// For polymorphic sites we need to make sure e.g. the SB tag is sent to the annotation engine and then removed later. // For polymorphic sites we need to make sure e.g. the SB tag is sent to the annotation engine and then removed later.
@ -337,9 +351,33 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
} else { } else {
return null; return null;
} }
result = removeInfoAnnotationsIfNoAltAllele(result);
return result; return result;
} }
/**
* Remove INFO field annotations if no alternate alleles
*
* @param vc the variant context
* @return variant context with the INFO field annotations removed if no alternate alleles
*/
private VariantContext removeInfoAnnotationsIfNoAltAllele(final VariantContext vc) {
// If no alt alleles, remove any RankSumTest or RMSAnnotation attribute
if ( vc.getAlternateAlleles().isEmpty() ) {
final VariantContextBuilder builder = new VariantContextBuilder(vc);
for ( final String annotation : infoFieldAnnotationKeyNamesToRemove ) {
builder.rmAttribute(annotation);
}
return builder.make();
} else {
return vc;
}
}
/** /**
* Remove NON-REF alleles from the variant context * Remove NON-REF alleles from the variant context
* *
@ -357,16 +395,16 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
} }
} }
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles); // If no alt allele, then remove INFO fields that require alt alleles
// No alt allele, so remove INFO fields that require alt alleles
if ( newAlleles.size() == 1 ) { if ( newAlleles.size() == 1 ) {
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
for ( final String name : infoHeaderAltAllelesLineNames ) { for ( final String name : infoHeaderAltAllelesLineNames ) {
builder.rmAttributes(Arrays.asList(name)); builder.rmAttributes(Arrays.asList(name));
} }
return builder.make();
} else {
return vc;
} }
return builder.make();
} }
/** /**
@ -420,13 +458,14 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
* 4. change the PGT value from "0|1" to "1|1" for homozygous variant genotypes * 4. change the PGT value from "0|1" to "1|1" for homozygous variant genotypes
* 5. move GQ to RGQ if the site is monomorphic * 5. move GQ to RGQ if the site is monomorphic
* *
* @param VC the VariantContext with the Genotypes to fix * @param vc the VariantContext with the Genotypes to fix
* @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic * @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic
* @return a new set of Genotypes * @return a new set of Genotypes
*/ */
private List<Genotype> cleanupGenotypeAnnotations(final VariantContext VC, final boolean createRefGTs) { private List<Genotype> cleanupGenotypeAnnotations(final VariantContext vc, final boolean createRefGTs) {
final GenotypesContext oldGTs = VC.getGenotypes(); final GenotypesContext oldGTs = vc.getGenotypes();
final List<Genotype> recoveredGs = new ArrayList<>(oldGTs.size()); final List<Genotype> recoveredGs = new ArrayList<>(oldGTs.size());
for ( final Genotype oldGT : oldGTs ) { for ( final Genotype oldGT : oldGTs ) {
final Map<String, Object> attrs = new HashMap<>(oldGT.getExtendedAttributes()); final Map<String, Object> attrs = new HashMap<>(oldGT.getExtendedAttributes());
@ -455,15 +494,15 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
} }
// create AD if it's not there // create AD if it's not there
if ( !oldGT.hasAD() && VC.isVariant() ) { if ( !oldGT.hasAD() && vc.isVariant() ) {
final int[] AD = new int[VC.getNAlleles()]; final int[] AD = new int[vc.getNAlleles()];
AD[0] = depth; AD[0] = depth;
builder.AD(AD); builder.AD(AD);
} }
if ( createRefGTs ) { if ( createRefGTs ) {
final int ploidy = oldGT.getPloidy(); final int ploidy = oldGT.getPloidy();
final List<Allele> refAlleles = Collections.nCopies(ploidy,VC.getReference()); final List<Allele> refAlleles = Collections.nCopies(ploidy,vc.getReference());
//keep 0 depth samples and 0 GQ samples as no-call //keep 0 depth samples and 0 GQ samples as no-call
if (depth > 0 && oldGT.hasGQ() && oldGT.getGQ() > 0) { if (depth > 0 && oldGT.hasGQ() && oldGT.getGQ() > 0) {

View File

@ -251,7 +251,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec( final WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-allSites"), baseBPResolutionString("-allSites"),
1, 1,
Collections.singletonList("77924e6b958a30f954e1c3a9f504a6a7")); Collections.singletonList("764ac46e0b985db187d85655240f7ec0"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testAllSitesNonBiallelic", spec); executeTest("testAllSitesNonBiallelic", spec);
} }
@ -685,9 +685,19 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
@Test @Test
public void testNewQualNaNBugFix() { public void testNewQualNaNBugFix() {
final WalkerTestSpec spec = new WalkerTestSpec( final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -newQual -V " + privateTestDir + "input-newqual-nan-bug-fix.vcf", b37KGReferenceWithDecoy), baseTestString(" -newQual -V " + privateTestDir + "input-newqual-nan-bug-fix.vcf", b37KGReferenceWithDecoy),
Collections.singletonList("503f4193c22fbcc451bd1c425b8b6bf8")); Collections.singletonList("503f4193c22fbcc451bd1c425b8b6bf8"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testNewQualNaNBugFix", spec); executeTest("testNewQualNaNBugFix", spec);
} }
@Test
public void testHomRefHighMQ() {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "NA18503.22.vcf -V " + privateTestDir + "NA18504.22.vcf -V " +
privateTestDir + "NA18505.22.vcf -allSites", b37KGReference),
Collections.singletonList("6d253024246e1024b9b6e8f885f53799"));
spec.disableShadowBCF();
executeTest("testHomRefHighMQ", spec);
}
} }

View File

@ -62,6 +62,15 @@ public class VariantAnnotatorEngine {
// Map of info field name to info field // Map of info field name to info field
private final Map<String, VCFInfoHeaderLine> hInfoMap = new HashMap<>(); private final Map<String, VCFInfoHeaderLine> hInfoMap = new HashMap<>();
/**
* Get the requested INFO field annotations
*
* @return requested INFO field annotations
*/
public List<InfoFieldAnnotation> getRequestedInfoAnnotations() {
return requestedInfoAnnotations;
}
protected static class VAExpression { protected static class VAExpression {
public String fullName, fieldName; public String fullName, fieldName;