Merge pull request #1470 from broadinstitute/rhl_rm_nonref_genotypegvcfs
Remove NON_REF from allSites VCF output
This commit is contained in:
commit
e0cf6d1985
|
|
@ -157,7 +157,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
|
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
|
||||||
public boolean uniquifySamples = false;
|
public boolean uniquifySamples = false;
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -194,6 +194,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||||
public boolean alwaysAppendDbsnpId() { return false; }
|
public boolean alwaysAppendDbsnpId() { return false; }
|
||||||
|
|
||||||
|
// INFO Header names that require alt alleles
|
||||||
|
final Set<String> infoHeaderAltAllelesLineNames = new LinkedHashSet<>();
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
boolean inputsAreTagged = false;
|
boolean inputsAreTagged = false;
|
||||||
|
|
@ -248,6 +250,18 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
|
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
|
||||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
|
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
|
||||||
|
|
||||||
|
if ( INCLUDE_NON_VARIANTS ) {
|
||||||
|
// Save INFO header names that require alt alleles
|
||||||
|
for ( final VCFHeaderLine headerLine : headerLines ) {
|
||||||
|
if (headerLine instanceof VCFInfoHeaderLine ) {
|
||||||
|
if (((VCFInfoHeaderLine) headerLine).getCountType() == VCFHeaderLineCount.A) {
|
||||||
|
infoHeaderAltAllelesLineNames.add(((VCFInfoHeaderLine) headerLine).getID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
||||||
|
|
||||||
|
|
@ -319,12 +333,42 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
} else if (INCLUDE_NON_VARIANTS) {
|
} else if (INCLUDE_NON_VARIANTS) {
|
||||||
result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true)).make();
|
result = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true)).make();
|
||||||
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
||||||
|
result = removeNonRefAlleles(result);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove NON-REF alleles from the variant context
|
||||||
|
*
|
||||||
|
* @param vc the variant context
|
||||||
|
* @return variant context with the NON-REF alleles removed if multiallelic or replaced with NO-CALL alleles if biallelic
|
||||||
|
*/
|
||||||
|
private VariantContext removeNonRefAlleles(final VariantContext vc) {
|
||||||
|
|
||||||
|
// If NON_REF is the only alt allele, ignore this site
|
||||||
|
final List<Allele> newAlleles = new ArrayList<>();
|
||||||
|
// Only keep alleles that are not NON-REF
|
||||||
|
for ( final Allele allele : vc.getAlleles() ) {
|
||||||
|
if ( !allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) ) {
|
||||||
|
newAlleles.add(allele);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder(vc).alleles(newAlleles);
|
||||||
|
|
||||||
|
// No alt allele, so remove INFO fields that require alt alleles
|
||||||
|
if ( newAlleles.size() == 1 ) {
|
||||||
|
for ( final String name : infoHeaderAltAllelesLineNames ) {
|
||||||
|
builder.rmAttributes(Arrays.asList(name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.make();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines whether the provided VariantContext has real alternate alleles.
|
* Determines whether the provided VariantContext has real alternate alleles.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -157,7 +157,8 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||||
1,
|
1,
|
||||||
Collections.singletonList("edf083b3bf9cdec31b997a70fd56a7b2"));
|
Collections.singletonList("ea11554de21ef8f25e9983db8b5a8480"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -245,6 +246,16 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testNDA", spec);
|
executeTest("testNDA", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAllSitesNonBiallelic() {
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseBPResolutionString("-allSites"),
|
||||||
|
1,
|
||||||
|
Collections.singletonList("77924e6b958a30f954e1c3a9f504a6a7"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testAllSitesNonBiallelic", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMaxAltAlleles() {
|
public void testMaxAltAlleles() {
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
@ -579,7 +590,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
baseTestString(" -V " + privateTestDir + "set.zero.RGQs.no.call.sample1.g.vcf" +
|
baseTestString(" -V " + privateTestDir + "set.zero.RGQs.no.call.sample1.g.vcf" +
|
||||||
" -V " + privateTestDir + "set.zero.RGQs.no.call.sample2.g.vcf" +
|
" -V " + privateTestDir + "set.zero.RGQs.no.call.sample2.g.vcf" +
|
||||||
" -L chr16:1279274-1279874 -allSites", hg19ReferenceWithChrPrefixInChromosomeNames),
|
" -L chr16:1279274-1279874 -allSites", hg19ReferenceWithChrPrefixInChromosomeNames),
|
||||||
Collections.singletonList("fc7016c0cd5cfa186bab80329eb0bc13"));
|
Collections.singletonList("e88db6e49c12487c55de42769d2f8c6c"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testSetZeroRGQsToNoCall", spec);
|
executeTest("testSetZeroRGQsToNoCall", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -657,7 +668,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
public void testGenotypingSpanningDeletionWithAllSites() {
|
public void testGenotypingSpanningDeletionWithAllSites() {
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -V " + privateTestDir + "spanningDel.genotyping.g.vcf -allSites", b37KGReference),
|
baseTestString(" -V " + privateTestDir + "spanningDel.genotyping.g.vcf -allSites", b37KGReference),
|
||||||
Collections.singletonList("e2370ba728cc9b73950b2ed616ef669f"));
|
Collections.singletonList("d3d862faf954f9bb8b1619c3e889ad8c"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
|
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue