Several improvements to GenotypeGVCFs: --includeNonVariantSites now actually works and we propagate AD to hom ref samples

This commit is contained in:
Eric Banks 2014-03-20 00:01:50 -04:00
parent 824983af1d
commit 7c8ce3cd6a
2 changed files with 47 additions and 21 deletions

View File

@ -121,8 +121,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
@Output(doc="File to which variants should be written")
protected VariantContextWriter vcfWriter = null;
// TODO -- currently this option doesn't actually work; must fix
@Argument(fullName="includeNonVariants", shortName="inv", doc="Include loci found to be non-variant after the combining procedure", required=false)
@Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false)
public boolean INCLUDE_NON_VARIANTS = false;
/**
@ -216,41 +215,68 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make();
}
// if it turned monomorphic and we don't want such sites, quit
if ( !INCLUDE_NON_VARIANTS && result.isMonomorphicInSamples() )
return null;
// if it turned monomorphic then we either need to ignore or fix such sites
boolean createRefGTs = false;
if ( result.isMonomorphicInSamples() ) {
if ( !INCLUDE_NON_VARIANTS )
return null;
createRefGTs = true;
}
// re-annotate it
result = annotationEngine.annotateContext(tracker, ref, null, result);
// fix some of the annotations
return new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result.getGenotypes())).make();
return new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, createRefGTs)).make();
}
/**
* Cleans up genotype-level annotations that need to be updated.
* 1. move MIN_DP to DP if present
* 2. remove SB is present
* 2. propagate DP to AD if not present
* 3. remove SB if present
*
* @param newGs the new Genotypes to fix
* @param VC the VariantContext with the Genotypes to fix
* @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic
* @return a new set of Genotypes
*/
private List<Genotype> cleanupGenotypeAnnotations(final GenotypesContext newGs) {
final List<Genotype> recoveredGs = new ArrayList<>(newGs.size());
for ( final Genotype newG : newGs ) {
final Map<String, Object> attrs = new HashMap<>(newG.getExtendedAttributes());
private List<Genotype> cleanupGenotypeAnnotations(final VariantContext VC, final boolean createRefGTs) {
final GenotypesContext oldGTs = VC.getGenotypes();
final List<Genotype> recoveredGs = new ArrayList<>(oldGTs.size());
for ( final Genotype oldGT : oldGTs ) {
final Map<String, Object> attrs = new HashMap<>(oldGT.getExtendedAttributes());
final GenotypeBuilder builder = new GenotypeBuilder(newG);
final GenotypeBuilder builder = new GenotypeBuilder(oldGT);
int depth = oldGT.hasDP() ? oldGT.getDP() : 0;
// move the MIN_DP to DP
if ( newG.hasExtendedAttribute("MIN_DP") ) {
builder.DP(newG.getAttributeAsInt("MIN_DP", 0));
if ( oldGT.hasExtendedAttribute("MIN_DP") ) {
depth = oldGT.getAttributeAsInt("MIN_DP", 0);
builder.DP(depth);
attrs.remove("MIN_DP");
}
// remove SB
attrs.remove("SB");
// create AD if it's not there
if ( !oldGT.hasAD() && VC.isVariant() ) {
final int[] AD = new int[VC.getNAlleles()];
AD[0] = depth;
builder.AD(AD);
}
if ( createRefGTs ) {
final int ploidy = oldGT.getPloidy();
final List<Allele> refAlleles = new ArrayList<>(ploidy);
for ( int i = 0; i < ploidy; i++ )
refAlleles.add(VC.getReference());
builder.alleles(refAlleles);
// also, the PLs are technically no longer usable
builder.noPL();
}
recoveredGs.add(builder.noAttributes().attributes(attrs).make());
}
return recoveredGs;

View File

@ -65,19 +65,19 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
Arrays.asList("2be5f6f7e7f79841108906555d548683"));
Arrays.asList("af58920a965229526aa6961f0d30b035"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@Test(enabled = false) // TODO -- reenable when this option works
@Test(enabled = true)
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -inv -L 20:10,000,000-10,010,000", b37KGReference),
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
1,
Arrays.asList("de957075796512cb9f333f77515e97d5"));
Arrays.asList("764c8d62b4128496471f49cda029b5da"));
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
}
@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
1,
Arrays.asList("e3c7452277898fece54bf60af9588666"));
Arrays.asList("67120b6784e91e737c37a1d0b3d53bc4"));
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
}
@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V " + privateTestDir + "gvcfExample1.vcf" +
" -V " + privateTestDir + "gvcfExample2.vcf",
1,
Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb"));
Arrays.asList("24401bb1011bf6cd452e428c6ad5852d"));
executeTest("testSamplesWithDifferentLs", spec);
}
}