Using --keepOriginalAC in SelectVariants was causing it to emit bad VCFs

* This occurred when one or more alleles were lost from the record after selection
  * Discussed here: http://gatkforums.broadinstitute.org/discussion/comment/4718#Comment_4718
  * Added some integration tests for --keepOriginalAC (there were none before)
This commit is contained in:
Eric Banks 2013-04-04 12:02:22 -05:00
parent 7897d52f32
commit 6253ba164e
2 changed files with 40 additions and 6 deletions

View File

@ -242,6 +242,32 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testRemoveMLE--" + testFile, spec); executeTest("testRemoveMLE--" + testFile, spec);
} }
@Test
public void testKeepOriginalAC() {
String testFile = privateTestDir + "vcfexample.loseAlleleInSelection.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --keepOriginalAC -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("ad7e8b25e431a3229a78cec063876559")
);
executeTest("testKeepOriginalAC--" + testFile, spec);
}
@Test
public void testKeepOriginalACAndENV() {
String testFile = privateTestDir + "vcfexample.loseAlleleInSelection.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("e9b8292212545684cdb163423329ee7e")
);
executeTest("testKeepOriginalACAndENV--" + testFile, spec);
}
@Test @Test
public void testMultipleRecordsAtOnePosition() { public void testMultipleRecordsAtOnePosition() {
String testFile = privateTestDir + "selectVariants.onePosition.vcf"; String testFile = privateTestDir + "selectVariants.onePosition.vcf";

View File

@ -406,8 +406,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
headerLines.add(new VCFHeaderLine("source", "SelectVariants")); headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
if (KEEP_ORIGINAL_CHR_COUNTS) { if (KEEP_ORIGINAL_CHR_COUNTS) {
headerLines.add(new VCFInfoHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); headerLines.add(new VCFInfoHeaderLine("AC_Orig", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine("AF_Orig", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
} }
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
@ -670,7 +670,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
GenotypesContext newGC = sub.getGenotypes(); GenotypesContext newGC = sub.getGenotypes();
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate) // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() ) final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
if ( lostAllelesInSelection )
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes()); newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
@ -697,15 +698,22 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
builder.genotypes(newGC); builder.genotypes(newGC);
addAnnotations(builder, sub); addAnnotations(builder, sub, lostAllelesInSelection);
return builder.make(); return builder.make();
} }
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC) { /*
* Add annotations to the new VC
*
* @param builder the new VC to annotate
* @param originalVC the original -- but post-selection -- VC
* @param lostAllelesInSelection true if the original (pre-selection) VC has more alleles than the new one
*/
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final boolean lostAllelesInSelection) {
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
if (KEEP_ORIGINAL_CHR_COUNTS) { if ( KEEP_ORIGINAL_CHR_COUNTS && !lostAllelesInSelection ) {
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )