Merge pull request #1204 from broadinstitute/ms_fix_SelectVariants

Fix for out of date VCF version output from SelectVariants
This commit is contained in:
ldgauthier 2015-11-05 08:07:48 -05:00
commit 448b575701
2 changed files with 14 additions and 3 deletions

View File

@ -612,6 +612,8 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
/**
* Test excluding variants with IDs
* Also tests --forceValidOutput flag, which changes the GQ from floats to ints to match
* header spec.
*/
@Test
public void testExcludeSelectionID() {
@ -619,9 +621,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String idFile = privateTestDir + "complexExample1.vcf.id";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -xlIDs " + idFile + " --variant " + testFile),
baseTestString(" -xlIDs " + idFile + " --variant " + testFile + " --forceValidOutput"),
1,
Arrays.asList("6c1e8591c134519bfc202b4ec7ef1f71")
Arrays.asList("45ad235b42bac75aa269e12bcd88a411")
);
spec.disableShadowBCF();
executeTest("testExcludeSelectionID--" + testFile, spec);

View File

@ -574,6 +574,15 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
@Argument(fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", required=false, doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.")
private boolean allowNonOverlappingCommandLineSamples = false;
/**
* If this argument is provided, the output will be compliant with the version in the header, however it will also
* cause the tool to run slower than without the argument. Without the argument the header will be compliant with
* the up-to-date version, but the output in the body may not be compliant. If an up-to-date input file is used,
* then the output will also be up-to-date regardless of this argument.
*/
@Argument(fullName="forceValidOutput", required=false, doc="Forces output VCF to be compliant to up-to-date version")
private boolean forceValidOutput = false;
public enum NumberAlleleRestriction {
ALL,
BIALLELIC,
@ -1008,7 +1017,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) {
//subContextFromSamples() always decodes the vc, which is a fairly expensive operation. Avoid if possible
if ( noSamplesSpecified && !removeUnusedAlternates )
if ( noSamplesSpecified && !removeUnusedAlternates && !forceValidOutput )
return vc;
// strip out the alternate alleles that aren't being used