From 78deb3f19595816d08fbde35db5f62189727048e Mon Sep 17 00:00:00 2001
From: Mark DePristo
Date: Wed, 17 Aug 2011 11:57:00 -0400
Subject: [PATCH] Fixed bad character in documentation
---
.../walkers/variantutils/VariantsToTable.java | 226 ++++++++++++------
1 file changed, 153 insertions(+), 73 deletions(-)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index af3593ce4..51515b2d3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -40,29 +40,97 @@ import java.io.PrintStream;
import java.util.*;
/**
- * Emits specific fields as dictated by the user from one or more VCF files.
+ * Emits specific fields from a VCF file to a table-deliminated format
+ *
+ *
+ * This walker accepts a single VCF file and writes out user-selected fields from the
+ * VCF as a header-containing, tab-deliminated file. The user specifies one or more
+ * fields to print with the -F NAME, each of which appears as a single column in
+ * the output file, with a header named NAME, and the value of this field in the VCF
+ * one per line. NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding
+ * in the INFO field (AC=10). Note that this tool does not support capturing any
+ * GENOTYPE field values. If a VCF record is missing a value, then the tool by
+ * default throws an error, but the special value NA can be emitted instead with
+ * appropriate tool arguments.
+ *
+ *
+ *
+ * Input
+ *
+ *
+ * - A VCF file
+ * - A list of -F fields to write
+ *
+ *
+ *
+ * Output
+ *
+ * A table deliminated file containing the values of the requested fields in the VCF file
+ *
+ *
+ * Examples
+ *
+ * -T $WalkerName \
+ * -V file.vcf \
+ * -F CHROM -F POS -F ID -F QUAL -F AC \
+ * -o results.table
+ *
+ * would produce a file that looks like:
+ *
+ * CHROM POS ID QUAL AC
+ * 1 10 . 50 1
+ * 1 20 rs10 99 10
+ * et cetera...
+ *
+ *
+ * @author Mark DePristo
+ * @since 2010
*/
public class VariantsToTable extends RodWalker {
-
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
- @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
- public ArrayList fieldsToTake = new ArrayList();
+ /**
+ * -F NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding in the INFO field (e.g., AC=10).
+ * Note that this tool does not support capturing any GENOTYPE field values. Note this argument
+ * accepts any number of inputs. So -F CHROM -F POS is allowed.
+ */
+ @Argument(fullName="fields", shortName="F", doc="The name of each field to capture for output in the table", required=true)
+ public List fieldsToTake = new ArrayList();
- @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
+ /**
+ * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered).
+ * Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value.
+ */
+ @Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false)
public boolean showFiltered = false;
- @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
+ /**
+ * If provided, then this tool will exit with success after this number of records have been emitted to the file.
+ */
+ @Argument(fullName="maxRecords", shortName="M", doc="If provided, we will emit at most maxRecord records to the table", required=false)
public int MAX_RECORDS = -1;
int nRecords = 0;
+ /**
+ * By default, only biallelic (REF=A, ALT=B) sites are including in the output. If this flag is provided, then
+ * VariantsToTable will emit field values for records with multiple ALT alleles. Note that in general this
+ * can make your resulting file unreadable and malformated according to tools like R, as the representation of
+ * multi-allelic INFO field values can be lists of values.
+ */
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
public boolean keepMultiAllelic = false;
+ /**
+ * By default, this tool throws a UserException when it encounters a field without a value in some record. This
+ * is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being
+ * found before the tool runs through 40M 1000G records. However, in some cases you genuinely want to allow such
+ * fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument
+ * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
+ */
@Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false)
public boolean ALLOW_MISSING_DATA = false;
@@ -70,65 +138,6 @@ public class VariantsToTable extends RodWalker {
out.println(Utils.join("\t", fieldsToTake));
}
- public static abstract class Getter { public abstract String get(VariantContext vc); }
- public static Map getters = new HashMap();
-
- static {
- // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
- getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
- getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
- getters.put("REF", new Getter() {
- public String get(VariantContext vc) {
- String x = "";
- if ( vc.hasReferenceBaseForIndel() ) {
- Byte refByte = vc.getReferenceBaseForIndel();
- x=x+new String(new byte[]{refByte});
- }
- return x+vc.getReference().getDisplayString();
- }
- });
- getters.put("ALT", new Getter() {
- public String get(VariantContext vc) {
- StringBuilder x = new StringBuilder();
- int n = vc.getAlternateAlleles().size();
- if ( n == 0 ) return ".";
- if ( vc.hasReferenceBaseForIndel() ) {
- Byte refByte = vc.getReferenceBaseForIndel();
- x.append(new String(new byte[]{refByte}));
- }
-
- for ( int i = 0; i < n; i++ ) {
- if ( i != 0 ) x.append(",");
- x.append(vc.getAlternateAllele(i).getDisplayString());
- }
- return x.toString();
- }
- });
- getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
- getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
- if ( vc.isSNP() && vc.isBiallelic() )
- return VariantContextUtils.isTransition(vc) ? "1" : "0";
- else
- return "-1";
- }});
- getters.put("FILTER", new Getter() { public String get(VariantContext vc) {
- return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); }
- });
-
- getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
- getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
- getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
- getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
- getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
- getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
- getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
- getters.put("GQ", new Getter() { public String get(VariantContext vc) {
- if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
- return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError());
- }});
- }
-
-
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) // RodWalkers can make funky map calls
return 0;
@@ -155,6 +164,15 @@ public class VariantsToTable extends RodWalker {
return s.endsWith("*");
}
+ /**
+ * Utility function that returns the list of values for each field in fields from vc.
+ *
+ * @param vc the VariantContext whose field values we can to capture
+ * @param fields a non-null list of fields to capture from VC
+ * @param allowMissingData if false, then throws a UserException if any field isn't found in vc. Otherwise
+ * provides a value of NA
+ * @return
+ */
public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) {
List vals = new ArrayList();
@@ -213,13 +231,75 @@ public class VariantsToTable extends RodWalker {
return vals;
}
- public Integer reduceInit() {
- return 0;
- }
-
- public Integer reduce(Integer counter, Integer sum) {
- return counter + sum;
- }
-
+ //
+ // default reduce -- doesn't do anything at all
+ //
+ public Integer reduceInit() { return 0; }
+ public Integer reduce(Integer counter, Integer sum) { return counter + sum; }
public void onTraversalDone(Integer sum) {}
+
+ // ----------------------------------------------------------------------------------------------------
+ //
+ // static system for getting values from VC by name.
+ //
+ // ----------------------------------------------------------------------------------------------------
+
+ public static abstract class Getter { public abstract String get(VariantContext vc); }
+ public static Map getters = new HashMap();
+
+ static {
+ // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
+ getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
+ getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
+ getters.put("REF", new Getter() {
+ public String get(VariantContext vc) {
+ String x = "";
+ if ( vc.hasReferenceBaseForIndel() ) {
+ Byte refByte = vc.getReferenceBaseForIndel();
+ x=x+new String(new byte[]{refByte});
+ }
+ return x+vc.getReference().getDisplayString();
+ }
+ });
+ getters.put("ALT", new Getter() {
+ public String get(VariantContext vc) {
+ StringBuilder x = new StringBuilder();
+ int n = vc.getAlternateAlleles().size();
+ if ( n == 0 ) return ".";
+ if ( vc.hasReferenceBaseForIndel() ) {
+ Byte refByte = vc.getReferenceBaseForIndel();
+ x.append(new String(new byte[]{refByte}));
+ }
+
+ for ( int i = 0; i < n; i++ ) {
+ if ( i != 0 ) x.append(",");
+ x.append(vc.getAlternateAllele(i).getDisplayString());
+ }
+ return x.toString();
+ }
+ });
+ getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
+ getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
+ if ( vc.isSNP() && vc.isBiallelic() )
+ return VariantContextUtils.isTransition(vc) ? "1" : "0";
+ else
+ return "-1";
+ }});
+ getters.put("FILTER", new Getter() { public String get(VariantContext vc) {
+ return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); }
+ });
+
+ getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
+ getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
+ getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
+ getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
+ getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
+ getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
+ getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
+ getters.put("GQ", new Getter() { public String get(VariantContext vc) {
+ if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
+ return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError());
+ }});
+ }
+
}