Now writes results to a minimal vcf with annotations included in the INFO field. Must be run with -NO_HEADER to totally remove header for the most bare bones vcf; otherwise also includes command line meta data.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5649 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
corin 2011-04-15 20:14:02 +00:00
parent fe26954ac6
commit 59215dab48
1 changed files with 42 additions and 46 deletions

View File

@ -31,15 +31,20 @@ import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.bed.BedParser;
@ -49,6 +54,7 @@ import javax.activation.*;
import java.beans.VetoableChangeSupport;
import java.io.PrintStream;
import java.io.Writer;
import java.security.KeyStore;
import java.util.*;
@ -59,75 +65,66 @@ import java.util.*;
@Requires(value={},referenceMetaData=@RMD(name="validated", type=VariantContext.class))
public class DeclareValidityWalker extends RodWalker<Integer, Integer>{
@Output(doc = "Write bed to this file instead of standard out")
public PrintStream out;
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null;
@Argument(fullName = "validity", shortName = "V",
doc = "Rank of variant validity on a 0-4 scale where 0 is definitely false positive; 4 is definitely true positive.")
int validity;
@Argument(fullName = "Note", shortName = "N", doc = "Annotation to be included in FP/TP field", required = false)
String Note =".";
String note =".";
@Argument(fullName = "Source", shortName = "s", doc = "Institutional source of annotation", required = false)
String Source = ".";
String source = ".";
@Argument(fullName = "Build", shortName = "bld", doc = "Genome build", required = false)
String build = ".";
@Override
public Integer reduceInit() {
Set<VCFHeaderLine> old = VCFUtils.getHeaderFields(getToolkit());
Set<VCFHeaderLine> newlines = new HashSet<VCFHeaderLine>();
for(VCFHeaderLine each : old){
if(each.getKey().equals("fileformat")) newlines.add(each);
}
vcfWriter.writeHeader(new VCFHeader(newlines));
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
public class ValidityDeclaration{
protected String contig;
protected long start;
protected long stop;
protected int valid;
protected Allele refBase;
protected Allele altBase;
protected String Note;
protected String Source;
protected String Build;
protected String user;
public String getBuild(){
public Map<String, Object> addValidation(int Validity, String Note, String Source, String Build){
HashMap<String, Object> validityAnnots = new HashMap<String, Object>();
validityAnnots.put("validity", Validity);
validityAnnots.put("user", System.getenv("USER"));
if (Build.equals(".")) validityAnnots.put("build", getBuild());
else validityAnnots.put("build", Build);
validityAnnots.put("note", Note);
validityAnnots.put("Source", Source);
return validityAnnots;
}
public String getBuild(){
String refPath = getToolkit().getArguments().referenceFile.getPath();
if (refPath.contains("19")) {return "hg19";}
else if (refPath.contains("18")) {return "hg18";}
else if (refPath.contains("36")) {return "b36";}
else if (refPath.contains("37")) {return "b37";}
else {return "unknown";}
}
public ValidityDeclaration(GenomeLoc Loc, VariantContext Con, int validity, String Note, String Source, String Build){ //Constructor expects 1 based
this.contig=Loc.getContig();
this.start=Loc.getStart()-1;
this.stop=Loc.getStop();
this.valid =validity;
this.altBase = Con.getAlternateAllele(0);
if (Con.getAlternateAlleles().toArray().length >1)
{
logger.warn("***NOTE: Only the first alternate allele in a VCF will be declared as " +valid+"***");
}
this.refBase = Con.getReference();
this.Note = Note;
this.Source = Source;
this.user = System.getenv("USER");
if (Build == "."){
this.Build=getBuild();
}
else{this.Build = Build; }
}
public String toString() {
return String.format("%s\t%d\t%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s", contig, start, stop, refBase,altBase, validity, user, Build, Note, Source);
}
}
/**
*
@ -145,9 +142,8 @@ public class DeclareValidityWalker extends RodWalker<Integer, Integer>{
if (current == null) {
return 0;}
ValidityDeclaration bedLine = new ValidityDeclaration(ref.getLocus(), current, validity, Note, Source, build);
out.println(bedLine);
VariantContext declared = VariantContext.modifyAttributes( current, addValidation(validity, note, source, build));
vcfWriter.add(declared, ref.getBase());
return 1;
}