diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index ed937c881..8d5fadbb5 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -63,7 +63,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest { " --variant:vcf " + privateTestDir + "soap_gatk_annotated.vcf" + " -T VariantsToTable" + " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" + - " -L chr1 -o %s" + moreArgs; + " -L chr1 -o %s " + moreArgs; } private String variantsToTableMultiAllelicCmd(String moreArgs) { @@ -76,15 +76,15 @@ public class VariantsToTableIntegrationTest extends WalkerTest { @Test(enabled = true) public void testComplexVariantsToTable() { - WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"), + WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(""), Arrays.asList("e8f771995127b727fb433da91dd4ee98")); executeTest("testComplexVariantsToTable", spec); } @Test(enabled = true) public void testComplexVariantsToTableFail() { - WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(""), 1, UserException.class); - executeTest("testComplexVariantsToTable-FAIL", spec); + WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd("--errorIfMissingData"), 1, UserException.class); + executeTest("testComplexVariantsToTableFail", spec); } @Test(enabled = true) @@ -114,6 +114,32 @@ public class VariantsToTableIntegrationTest extends WalkerTest { executeTest("testGenotypeFields", spec); } + @Test(enabled = true) + public void testUnfilteredGenotypeField() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant " + privateTestDir + "vcfexample2.vcf" + + " -T VariantsToTable" + + " -GF RD -GF FT" + + " -o %s", + 1, + Arrays.asList("fec1e70f3c5762225535559feb5ffdee")); + executeTest("testUnfilteredGenotypeField", spec); + } + + @Test(enabled = true) + public void testUnfilteredGenotypeFieldFail() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant " + privateTestDir + "vcfexample2.vcf" + + " -T VariantsToTable" + + " -GF RD -GF FT --errorIfMissingData" + + " -o %s", + 1, + UserException.class); + executeTest("testUnfilteredGenotypeFieldFail", spec); + } + @Test public void testMultiallelicGenotypeFields() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java index 5a4b7e2b5..5ecbae794 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/VariantsToTable.java @@ -98,7 +98,7 @@ import java.util.*; * *

Caveats

* * @@ -173,18 +173,28 @@ public class VariantsToTable extends RodWalker { * fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error. */ - @Advanced - @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false) + @Hidden + @Deprecated + @Argument(fullName="allowMissingData", shortName="AMD", doc="This argument is no longer used in GATK versions 3.8 and newer. " + + "Please see the online documentation for --errorIfMissingData.", required=false) public boolean ALLOW_MISSING_DATA = false; + + /** + * By default, this tool will write out NA values indicating missing data when it encounters a field without a value in a record. + * If this flag is added to the command, the tool will instead exit with an error if missing data is encountered.. + */ + @Advanced + @Argument(fullName="errorIfMissingData", shortName="EMD", doc="If provided, we will require every record to contain every field", required=false) + public boolean ERROR_IF_MISSING_DATA = false; private final static String MISSING_DATA = "NA"; - private final List samples = new ArrayList(); + private final List samples = new ArrayList<>(); public void initialize() { if ( !genotypeFieldsToTake.isEmpty() ) { Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants); - TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + TreeSet vcfSamples = new TreeSet<>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); samples.addAll(vcfSamples); // optimization: if there are no samples, we don't have to worry about any genotype fields @@ -210,7 +220,7 @@ public class VariantsToTable extends RodWalker { for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) { if ( showFiltered || vc.isNotFiltered() ) { nRecords++; - for ( final List record : extractFields(vc, fieldsToTake, genotypeFieldsToTake, samples, ALLOW_MISSING_DATA, splitMultiAllelic) ) { + for ( final List record : extractFields(vc, fieldsToTake, genotypeFieldsToTake, samples, !ERROR_IF_MISSING_DATA, splitMultiAllelic) ) { if ( moltenizeOutput ) emitMoltenizedOutput(record); else @@ -281,7 +291,7 @@ public class VariantsToTable extends RodWalker { final boolean splitMultiAllelic) { final int numRecordsToProduce = splitMultiAllelic ? vc.getAlternateAlleles().size() : 1; - final List> records = new ArrayList>(numRecordsToProduce); + final List> records = new ArrayList<>(numRecordsToProduce); int numFields = fields.size(); final boolean addGenotypeFields = genotypeFields != null && !genotypeFields.isEmpty(); @@ -289,7 +299,7 @@ public class VariantsToTable extends RodWalker { numFields += genotypeFields.size() * samples.size(); for ( int i = 0; i < numRecordsToProduce; i++ ) - records.add(new ArrayList(numFields)); + records.add(new ArrayList<>(numFields)); for ( String field : fields ) { @@ -302,7 +312,7 @@ public class VariantsToTable extends RodWalker { } else if ( vc.hasAttribute(field) ) { addFieldValue(vc.getAttribute(field, null), records); } else if ( isWildCard(field) ) { - Set wildVals = new HashSet(); + Set wildVals = new HashSet<>(); for ( Map.Entry elt : vc.getAttributes().entrySet()) { if ( elt.getKey().startsWith(field.substring(0, field.length() - 1)) ) { wildVals.add(elt.getValue().toString()); @@ -310,17 +320,15 @@ public class VariantsToTable extends RodWalker { } String val = MISSING_DATA; - if ( wildVals.size() > 0 ) { - List toVal = new ArrayList(wildVals); + if ( !wildVals.isEmpty() ) { + List toVal = new ArrayList<>(wildVals); Collections.sort(toVal); val = Utils.join(",", toVal); } addFieldValue(val, records); - } else if ( ! allowMissingData ) { - throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); } else { - addFieldValue(MISSING_DATA, records); + handleMissingData(allowMissingData, field, records, vc); } } @@ -330,11 +338,22 @@ public class VariantsToTable extends RodWalker { if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) { if ( gf.equals(VCFConstants.GENOTYPE_KEY) ) addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records); - else - addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records); + else { + /** + * TODO - If gf == "FT" and the GT record is not filtered, Genotype.getAnyAttribute == null. Genotype.hasAnyAttribute should be changed so it + * returns false for this condition. Presently, it always returns true. Once this is fixed, then only the "addFieldValue" statement will + * remain in the following logic block. + */ + if (vc.getGenotype(sample).getAnyAttribute(gf) != null) { + addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records); + } else { + handleMissingData(allowMissingData, gf, records, vc); + } + } + } + else { + handleMissingData(allowMissingData, gf, records, vc); } - else - addFieldValue(MISSING_DATA, records); } } } @@ -342,9 +361,17 @@ public class VariantsToTable extends RodWalker { return records; } + private static void handleMissingData(final boolean allowMissingData, final String field, final List> records, final VariantContext vc) { + if (allowMissingData) { + addFieldValue(MISSING_DATA, records); + } else { + throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); + } + } + private static void addFieldValue(final Object val, final List> result) { final int numResultRecords = result.size(); - + // if we're trying to create a single output record, add it if ( numResultRecords == 1 ) { result.get(0).add(prettyPrintObject(val)); @@ -364,6 +391,10 @@ public class VariantsToTable extends RodWalker { } private static String prettyPrintObject(final Object val) { + // should never occur + if (val == null) + return ""; + if ( val instanceof List ) return prettyPrintObject(((List)val).toArray()); @@ -400,7 +431,7 @@ public class VariantsToTable extends RodWalker { // ---------------------------------------------------------------------------------------------------- public static abstract class Getter { public abstract String get(VariantContext vc); } - public static final Map getters = new HashMap(); + public static final Map getters = new HashMap<>(); static { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT