Merge pull request #491 from broadinstitute/eb_get_AD_back_from_gvcfs
Fixed up some of the genotype-level annotations being propogated in the ...
This commit is contained in:
commit
8c922be684
|
|
@ -68,8 +68,7 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
|||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
|
|
@ -124,6 +123,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
// TODO -- currently this option doesn't actually work; must fix
|
||||
@Argument(fullName="includeNonVariants", shortName="inv", doc="Include loci found to be non-variant after the combining procedure", required=false)
|
||||
public boolean INCLUDE_NON_VARIANTS = false;
|
||||
|
||||
|
|
@ -197,21 +197,20 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
*
|
||||
* @param tracker the ref tracker
|
||||
* @param ref the ref context
|
||||
* @param combinedVC the combined genomic VC
|
||||
* @param originalVC the combined genomic VC
|
||||
* @return a new VariantContext or null if the site turned monomorphic and we don't want such sites
|
||||
*/
|
||||
protected VariantContext regenotypeVC(final RefMetaDataTracker tracker, final ReferenceContext ref, final VariantContext combinedVC) {
|
||||
if ( combinedVC == null ) throw new IllegalArgumentException("combinedVC cannot be null");
|
||||
protected VariantContext regenotypeVC(final RefMetaDataTracker tracker, final ReferenceContext ref, final VariantContext originalVC) {
|
||||
if ( originalVC == null ) throw new IllegalArgumentException("originalVC cannot be null");
|
||||
|
||||
VariantContext result = combinedVC;
|
||||
final Map<String,Object> originalAttributes = combinedVC.getAttributes();
|
||||
VariantContext result = originalVC;
|
||||
|
||||
// only re-genotype polymorphic sites
|
||||
if ( combinedVC.isVariant() ) {
|
||||
if ( result.isVariant() ) {
|
||||
final VariantContext regenotypedVC = genotypingEngine.calculateGenotypes(result);
|
||||
if ( regenotypedVC == null )
|
||||
return null;
|
||||
result = new VariantContextBuilder(regenotypedVC).attributes(originalAttributes).make();
|
||||
result = new VariantContextBuilder(regenotypedVC).attributes(originalVC.getAttributes()).make();
|
||||
}
|
||||
|
||||
// if it turned monomorphic and we don't want such sites, quit
|
||||
|
|
@ -219,7 +218,43 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
return null;
|
||||
|
||||
// re-annotate it
|
||||
return annotationEngine.annotateContext(tracker, ref, null, result);
|
||||
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
||||
|
||||
// fix some of the annotations
|
||||
return new VariantContextBuilder(result).genotypes(fixGenotypeAnnotations(result.getGenotypes(), originalVC.getGenotypes())).make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes genotype-level annotations that need to be updated.
|
||||
* 1. recover the AD values from oldGs into newGs (the genotyping process strips out AD values for various reasons)
|
||||
* 2. move MIN_DP to DP if present
|
||||
* 3. remove SB is present
|
||||
*
|
||||
* @param newGs the new Genotypes to fix
|
||||
* @param originalGs the original Genotypes to pull annotations from
|
||||
* @return a new set of Genotypes
|
||||
*/
|
||||
private List<Genotype> fixGenotypeAnnotations(final GenotypesContext newGs, final GenotypesContext originalGs) {
|
||||
final List<Genotype> recoveredGs = new ArrayList<>(newGs.size());
|
||||
for ( final Genotype newG : newGs ) {
|
||||
final Genotype originalG = originalGs.get(newG.getSampleName());
|
||||
final Map<String, Object> attrs = new HashMap<>(newG.getExtendedAttributes());
|
||||
|
||||
// recover the AD
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(newG).AD(originalG.getAD());
|
||||
|
||||
// move the MIN_DP to DP
|
||||
if ( newG.hasExtendedAttribute("MIN_DP") ) {
|
||||
builder.DP(newG.getAttributeAsInt("MIN_DP", 0));
|
||||
attrs.remove("MIN_DP");
|
||||
}
|
||||
|
||||
// remove SB
|
||||
attrs.remove("SB");
|
||||
|
||||
recoveredGs.add(builder.noAttributes().attributes(attrs).make());
|
||||
}
|
||||
return recoveredGs;
|
||||
}
|
||||
|
||||
public VariantContextWriter reduceInit() {
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMD5s() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ad4916ff9ab1479845558ddaaae131a6"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c36bbd50c9596b2fa7a7fc3952ae9690"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ import java.util.Arrays;
|
|||
public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||
|
||||
private static String baseTestString(String args, String ref) {
|
||||
return "-T GenotypeGVCFs --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + ref + args;
|
||||
return "-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + ref + args;
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -65,11 +65,11 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("10670f6f04d3d662aa38c20ac74af35c"));
|
||||
Arrays.asList("8fd26c30509b98372c2945405a1d7cc4"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test(enabled = false) // TODO -- reenable when this option works
|
||||
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||
|
|
@ -78,7 +78,39 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -inv -L 20:10,000,000-10,010,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("de957075796512cb9f333f77515e97d5"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void combineSingleSamplePipelineGVCF_addDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("d0eb9046c24fa6a66ee20feff35457d4"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testJustOneSample() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("dd0e2846b3be9692ecb94f152b45c316"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSamplesWithDifferentLs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("a4f76a094af4708fc7f96a9b7a1b8726"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -1521,8 +1521,9 @@ public class GATKVariantContextUtils {
|
|||
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
|
||||
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
|
||||
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
|
||||
final int[] AD = g.hasAD() ? generateAD(g.getAD(), indexesOfRelevantAlleles) : null;
|
||||
|
||||
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).noAD().noGQ().make();
|
||||
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).AD(AD).noGQ().make();
|
||||
mergedGenotypes.add(newG);
|
||||
}
|
||||
}
|
||||
|
|
@ -1592,6 +1593,33 @@ public class GATKVariantContextUtils {
|
|||
return newPLs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current
|
||||
* alleles from the original AD.
|
||||
*
|
||||
* @param originalAD the original AD to extend
|
||||
* @param indexesOfRelevantAlleles the indexes of the original alleles corresponding to the new alleles
|
||||
* @return non-null array of new AD values
|
||||
*/
|
||||
private static int[] generateAD(final int[] originalAD, final int[] indexesOfRelevantAlleles) {
|
||||
|
||||
final int numADs = indexesOfRelevantAlleles.length;
|
||||
if ( numADs == originalAD.length )
|
||||
return originalAD;
|
||||
|
||||
final int[] newAD = new int[numADs];
|
||||
|
||||
for ( int i = 0; i < numADs; i++ ) {
|
||||
final int newIndex = indexesOfRelevantAlleles[i];
|
||||
if ( newIndex >= originalAD.length )
|
||||
newAD[i] = 0;
|
||||
else
|
||||
newAD[newIndex] = originalAD[i];
|
||||
}
|
||||
|
||||
return newAD;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is just a safe wrapper around GenotypeLikelihoods.calculatePLindex()
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue