Remove RankSumTest and RMSAnnotation from hom-ref sites

This commit is contained in:
Ron Levine 2016-10-07 12:10:28 -04:00
parent 2ee0755c35
commit 01a858542f
3 changed files with 72 additions and 14 deletions

View File

@ -64,9 +64,12 @@ import org.broadinstitute.gatk.engine.walkers.Reference;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.engine.walkers.TreeReducible;
import org.broadinstitute.gatk.engine.walkers.Window;
import org.broadinstitute.gatk.tools.walkers.annotator.RankSumTest;
import org.broadinstitute.gatk.tools.walkers.annotator.RMSAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AS_StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedArgumentCollection;
@ -188,6 +191,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
private UnifiedGenotypingEngine genotypingEngine;
// the annotation engine
private VariantAnnotatorEngine annotationEngine;
// the INFO field annotation key names to remove
private final List<String> infoFieldAnnotationKeyNamesToRemove = new ArrayList<>();
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
@ -223,6 +228,16 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
annotationEngine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, Collections.<String>emptyList(), this, toolkit);
// Request INFO field annotations inheriting from RankSumTest and RMSAnnotation added to remove list
for ( final InfoFieldAnnotation annotation : annotationEngine.getRequestedInfoAnnotations() ) {
if ( annotation instanceof RankSumTest || annotation instanceof RMSAnnotation ) {
final List<String> keyNames = annotation.getKeyNames();
if ( !keyNames.isEmpty() ) {
infoFieldAnnotationKeyNamesToRemove.add(keyNames.get(0));
}
}
}
// create the genotyping engine
// when checking for presence of AS_StandardAnnotation we must deal with annoying feature that
// the class name with or without the trailing "Annotation" are both valid command lines
@ -321,7 +336,6 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
//do trimming after allele-specific annotation reduction or the mapping is difficult
result = GATKVariantContextUtils.reverseTrimAlleles(result);
// Re-annotate and fix/remove some of the original annotations.
// Note that the order of these actions matters and is different for polymorphic and monomorphic sites.
// For polymorphic sites we need to make sure e.g. the SB tag is sent to the annotation engine and then removed later.
@ -337,9 +351,33 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
} else {
return null;
}
result = removeInfoAnnotationsIfNoAltAllele(result);
return result;
}
/**
* Remove INFO field annotations if no alternate alleles
*
* @param vc the variant context
* @return variant context with the INFO field annotations removed if no alternate alleles
*/
private VariantContext removeInfoAnnotationsIfNoAltAllele(final VariantContext vc) {
// If no alt alleles, remove any RankSumTest or RMSAnnotation attribute
if ( vc.getAlternateAlleles().isEmpty() ) {
final VariantContextBuilder builder = new VariantContextBuilder(vc);
for ( final String annotation : infoFieldAnnotationKeyNamesToRemove ) {
builder.rmAttribute(annotation);
}
return builder.make();
} else {
return vc;
}
}
/**
* Remove NON-REF alleles from the variant context
*
@ -357,16 +395,16 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
}
}
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
// No alt allele, so remove INFO fields that require alt alleles
// If no alt allele, then remove INFO fields that require alt alleles
if ( newAlleles.size() == 1 ) {
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
for ( final String name : infoHeaderAltAllelesLineNames ) {
builder.rmAttributes(Arrays.asList(name));
}
return builder.make();
} else {
return vc;
}
return builder.make();
}
/**
@ -420,13 +458,14 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
* 4. change the PGT value from "0|1" to "1|1" for homozygous variant genotypes
* 5. move GQ to RGQ if the site is monomorphic
*
* @param VC the VariantContext with the Genotypes to fix
* @param vc the VariantContext with the Genotypes to fix
* @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic
* @return a new set of Genotypes
*/
private List<Genotype> cleanupGenotypeAnnotations(final VariantContext VC, final boolean createRefGTs) {
final GenotypesContext oldGTs = VC.getGenotypes();
private List<Genotype> cleanupGenotypeAnnotations(final VariantContext vc, final boolean createRefGTs) {
final GenotypesContext oldGTs = vc.getGenotypes();
final List<Genotype> recoveredGs = new ArrayList<>(oldGTs.size());
for ( final Genotype oldGT : oldGTs ) {
final Map<String, Object> attrs = new HashMap<>(oldGT.getExtendedAttributes());
@ -455,15 +494,15 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
}
// create AD if it's not there
if ( !oldGT.hasAD() && VC.isVariant() ) {
final int[] AD = new int[VC.getNAlleles()];
if ( !oldGT.hasAD() && vc.isVariant() ) {
final int[] AD = new int[vc.getNAlleles()];
AD[0] = depth;
builder.AD(AD);
}
if ( createRefGTs ) {
final int ploidy = oldGT.getPloidy();
final List<Allele> refAlleles = Collections.nCopies(ploidy,VC.getReference());
final List<Allele> refAlleles = Collections.nCopies(ploidy,vc.getReference());
//keep 0 depth samples and 0 GQ samples as no-call
if (depth > 0 && oldGT.hasGQ() && oldGT.getGQ() > 0) {

View File

@ -251,7 +251,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-allSites"),
1,
Collections.singletonList("77924e6b958a30f954e1c3a9f504a6a7"));
Collections.singletonList("764ac46e0b985db187d85655240f7ec0"));
spec.disableShadowBCF();
executeTest("testAllSitesNonBiallelic", spec);
}
@ -685,9 +685,19 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
@Test
public void testNewQualNaNBugFix() {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -newQual -V " + privateTestDir + "input-newqual-nan-bug-fix.vcf", b37KGReferenceWithDecoy),
baseTestString(" -newQual -V " + privateTestDir + "input-newqual-nan-bug-fix.vcf", b37KGReferenceWithDecoy),
Collections.singletonList("503f4193c22fbcc451bd1c425b8b6bf8"));
spec.disableShadowBCF();
executeTest("testNewQualNaNBugFix", spec);
}
@Test
public void testHomRefHighMQ() {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "NA18503.22.vcf -V " + privateTestDir + "NA18504.22.vcf -V " +
privateTestDir + "NA18505.22.vcf -allSites", b37KGReference),
Collections.singletonList("6d253024246e1024b9b6e8f885f53799"));
spec.disableShadowBCF();
executeTest("testHomRefHighMQ", spec);
}
}

View File

@ -62,6 +62,15 @@ public class VariantAnnotatorEngine {
// Map of info field name to info field
private final Map<String, VCFInfoHeaderLine> hInfoMap = new HashMap<>();
/**
* Get the requested INFO field annotations
*
* @return requested INFO field annotations
*/
public List<InfoFieldAnnotation> getRequestedInfoAnnotations() {
return requestedInfoAnnotations;
}
protected static class VAExpression {
public String fullName, fieldName;