A java reimplementation of vcf2table in python; supports getting more useful information about genotypes (HET, e.g.) than was possible in python.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4130 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1e193e4c20
commit
f384d4a5d6
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Combines VCF records from different sources; supports both full merges and set unions.
|
||||
* Merge: combines multiple records into a single one; if sample names overlap then they are uniquified.
|
||||
* Union: assumes each rod represents the same set of samples (although this is not enforced); using the
|
||||
* priority list (if provided), emits a single record instance at every position represented in the rods.
|
||||
*/
|
||||
//@Reference(window=@Window(start=-50,stop=50))
|
||||
@Requires(value={})
|
||||
public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
|
||||
public String FIELDS;
|
||||
|
||||
@Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
|
||||
public int MAX_RECORDS = -1;
|
||||
int nRecords = 0;
|
||||
|
||||
private List<String> fieldsToTake;
|
||||
|
||||
public void initialize() {
|
||||
fieldsToTake = Arrays.asList(FIELDS.toUpperCase().split(","));
|
||||
|
||||
out.println(Utils.join("\t", fieldsToTake));
|
||||
}
|
||||
|
||||
private static abstract class Getter { public abstract String get(VariantContext vc); }
|
||||
private static Map<String, Getter> getters = new HashMap<String, Getter>();
|
||||
|
||||
static {
|
||||
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
|
||||
getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
|
||||
getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
|
||||
getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } });
|
||||
getters.put("ALT", new Getter() {
|
||||
public String get(VariantContext vc) {
|
||||
StringBuilder x = new StringBuilder();
|
||||
int n = vc.getAlternateAlleles().size();
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
if ( i != 0 ) x.append(",");
|
||||
x.append(vc.getAlternateAllele(i).toString());
|
||||
}
|
||||
return x.toString();
|
||||
}
|
||||
});
|
||||
getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
|
||||
getters.put("FILTER", new Getter() { public String get(VariantContext vc) { return Utils.join(",", vc.getFilters()); } });
|
||||
|
||||
getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
|
||||
getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
|
||||
getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
|
||||
getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
|
||||
getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
|
||||
getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
|
||||
getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
|
||||
}
|
||||
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
for ( VariantContext vc : vcs) {
|
||||
List<String> vals = new ArrayList<String>();
|
||||
|
||||
for ( String field : fieldsToTake ) {
|
||||
String val = "UNK";
|
||||
|
||||
if ( getters.containsKey(field) ) {
|
||||
val = getters.get(field).get(vc);
|
||||
} else if ( vc.hasAttribute(field) ) {
|
||||
val = vc.getAttributeAsString(field);
|
||||
}
|
||||
|
||||
vals.add(val);
|
||||
}
|
||||
|
||||
out.println(Utils.join("\t", vals));
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else {
|
||||
if ( nRecords >= MAX_RECORDS ) {
|
||||
logger.warn("Calling sys exit to leave after " + nRecords + " records");
|
||||
System.exit(0); // todo -- what's the recommend way to abort like this?
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {}
|
||||
}
|
||||
Loading…
Reference in New Issue