Merge pull request #1470 from broadinstitute/rhl_rm_nonref_genotypegvcfs
Remove NON_REF from allSites VCF output
This commit is contained in:
commit
e0cf6d1985
|
|
@ -157,7 +157,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
|
||||
public boolean uniquifySamples = false;
|
||||
|
||||
@ArgumentCollection
|
||||
@ArgumentCollection
|
||||
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
||||
|
||||
/**
|
||||
|
|
@ -194,6 +194,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||
public boolean alwaysAppendDbsnpId() { return false; }
|
||||
|
||||
// INFO Header names that require alt alleles
|
||||
final Set<String> infoHeaderAltAllelesLineNames = new LinkedHashSet<>();
|
||||
|
||||
public void initialize() {
|
||||
boolean inputsAreTagged = false;
|
||||
|
|
@ -248,6 +250,18 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
|
||||
|
||||
if ( INCLUDE_NON_VARIANTS ) {
|
||||
// Save INFO header names that require alt alleles
|
||||
for ( final VCFHeaderLine headerLine : headerLines ) {
|
||||
if (headerLine instanceof VCFInfoHeaderLine ) {
|
||||
if (((VCFInfoHeaderLine) headerLine).getCountType() == VCFHeaderLineCount.A) {
|
||||
infoHeaderAltAllelesLineNames.add(((VCFInfoHeaderLine) headerLine).getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
||||
|
||||
|
|
@ -319,12 +333,42 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
} else if (INCLUDE_NON_VARIANTS) {
|
||||
result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true)).make();
|
||||
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
||||
result = removeNonRefAlleles(result);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove NON-REF alleles from the variant context
|
||||
*
|
||||
* @param vc the variant context
|
||||
* @return variant context with the NON-REF alleles removed if multiallelic or replaced with NO-CALL alleles if biallelic
|
||||
*/
|
||||
private VariantContext removeNonRefAlleles(final VariantContext vc) {
|
||||
|
||||
// If NON_REF is the only alt allele, ignore this site
|
||||
final List<Allele> newAlleles = new ArrayList<>();
|
||||
// Only keep alleles that are not NON-REF
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
if ( !allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) ) {
|
||||
newAlleles.add(allele);
|
||||
}
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
|
||||
|
||||
// No alt allele, so remove INFO fields that require alt alleles
|
||||
if ( newAlleles.size() == 1 ) {
|
||||
for ( final String name : infoHeaderAltAllelesLineNames ) {
|
||||
builder.rmAttributes(Arrays.asList(name));
|
||||
}
|
||||
}
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the provided VariantContext has real alternate alleles.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -157,7 +157,8 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||
1,
|
||||
Collections.singletonList("edf083b3bf9cdec31b997a70fd56a7b2"));
|
||||
Collections.singletonList("ea11554de21ef8f25e9983db8b5a8480"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
|
|
@ -245,6 +246,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
executeTest("testNDA", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllSitesNonBiallelic() {
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-allSites"),
|
||||
1,
|
||||
Collections.singletonList("77924e6b958a30f954e1c3a9f504a6a7"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testAllSitesNonBiallelic", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxAltAlleles() {
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -579,7 +590,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
baseTestString(" -V " + privateTestDir + "set.zero.RGQs.no.call.sample1.g.vcf" +
|
||||
" -V " + privateTestDir + "set.zero.RGQs.no.call.sample2.g.vcf" +
|
||||
" -L chr16:1279274-1279874 -allSites", hg19ReferenceWithChrPrefixInChromosomeNames),
|
||||
Collections.singletonList("fc7016c0cd5cfa186bab80329eb0bc13"));
|
||||
Collections.singletonList("e88db6e49c12487c55de42769d2f8c6c"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testSetZeroRGQsToNoCall", spec);
|
||||
}
|
||||
|
|
@ -657,7 +668,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testGenotypingSpanningDeletionWithAllSites() {
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "spanningDel.genotyping.g.vcf -allSites", b37KGReference),
|
||||
Collections.singletonList("e2370ba728cc9b73950b2ed616ef669f"));
|
||||
Collections.singletonList("d3d862faf954f9bb8b1619c3e889ad8c"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue