Merge pull request #1470 from broadinstitute/rhl_rm_nonref_genotypegvcfs

Remove NON_REF from allSites VCF output
This commit is contained in:
Ron Levine 2016-09-16 16:31:13 -04:00 committed by GitHub
commit e0cf6d1985
2 changed files with 59 additions and 4 deletions

View File

@ -157,7 +157,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
public boolean uniquifySamples = false;
@ArgumentCollection
@ArgumentCollection
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
/**
@ -194,6 +194,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
public boolean alwaysAppendDbsnpId() { return false; }
// INFO Header names that require alt alleles
final Set<String> infoHeaderAltAllelesLineNames = new LinkedHashSet<>();
public void initialize() {
boolean inputsAreTagged = false;
@ -248,6 +250,18 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
if ( INCLUDE_NON_VARIANTS ) {
// Save INFO header names that require alt alleles
for ( final VCFHeaderLine headerLine : headerLines ) {
if (headerLine instanceof VCFInfoHeaderLine ) {
if (((VCFInfoHeaderLine) headerLine).getCountType() == VCFHeaderLineCount.A) {
infoHeaderAltAllelesLineNames.add(((VCFInfoHeaderLine) headerLine).getID());
}
}
}
}
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
@ -319,12 +333,42 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
} else if (INCLUDE_NON_VARIANTS) {
result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true)).make();
result = annotationEngine.annotateContext(tracker, ref, null, result);
result = removeNonRefAlleles(result);
} else {
return null;
}
return result;
}
/**
* Remove NON-REF alleles from the variant context
*
* @param vc the variant context
* @return variant context with the NON-REF alleles removed if multiallelic or replaced with NO-CALL alleles if biallelic
*/
private VariantContext removeNonRefAlleles(final VariantContext vc) {
// If NON_REF is the only alt allele, ignore this site
final List<Allele> newAlleles = new ArrayList<>();
// Only keep alleles that are not NON-REF
for ( final Allele allele : vc.getAlleles() ) {
if ( !allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) ) {
newAlleles.add(allele);
}
}
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
// No alt allele, so remove INFO fields that require alt alleles
if ( newAlleles.size() == 1 ) {
for ( final String name : infoHeaderAltAllelesLineNames ) {
builder.rmAttributes(Arrays.asList(name));
}
}
return builder.make();
}
/**
* Determines whether the provided VariantContext has real alternate alleles.
*

View File

@ -157,7 +157,8 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
1,
Collections.singletonList("edf083b3bf9cdec31b997a70fd56a7b2"));
Collections.singletonList("ea11554de21ef8f25e9983db8b5a8480"));
spec.disableShadowBCF();
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
}
@ -245,6 +246,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
executeTest("testNDA", spec);
}
@Test
public void testAllSitesNonBiallelic() {
final WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-allSites"),
1,
Collections.singletonList("77924e6b958a30f954e1c3a9f504a6a7"));
spec.disableShadowBCF();
executeTest("testAllSitesNonBiallelic", spec);
}
@Test
public void testMaxAltAlleles() {
final WalkerTestSpec spec = new WalkerTestSpec(
@ -579,7 +590,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
baseTestString(" -V " + privateTestDir + "set.zero.RGQs.no.call.sample1.g.vcf" +
" -V " + privateTestDir + "set.zero.RGQs.no.call.sample2.g.vcf" +
" -L chr16:1279274-1279874 -allSites", hg19ReferenceWithChrPrefixInChromosomeNames),
Collections.singletonList("fc7016c0cd5cfa186bab80329eb0bc13"));
Collections.singletonList("e88db6e49c12487c55de42769d2f8c6c"));
spec.disableShadowBCF();
executeTest("testSetZeroRGQsToNoCall", spec);
}
@ -657,7 +668,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testGenotypingSpanningDeletionWithAllSites() {
final WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "spanningDel.genotyping.g.vcf -allSites", b37KGReference),
Collections.singletonList("e2370ba728cc9b73950b2ed616ef669f"));
Collections.singletonList("d3d862faf954f9bb8b1619c3e889ad8c"));
spec.disableShadowBCF();
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
}