The GATK no longer writes vcf3.3; welcome to the world of vcf4.0. Needed to fix a few output bugs to get this to work, but it's looking great. Much more still to come. Guillermo: hopefully this doesn't break your local build too badly.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3786 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-14 04:56:58 +00:00
parent 530a320f28
commit 6b5c88d4d6
39 changed files with 274 additions and 751 deletions

View File

@ -62,9 +62,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
// line numerical values are allowed to be unbounded (or unknown), which is
// marked with a dot (.)
public static int UNBOUNDED = -1; // the value we store internally for unbounded types
public static String UNBOUNDED_ENCODING_VCF4 = "."; // the encoding for vcf 4
public static String UNBOUNDED_ENCODING_VCF3 = "-1"; // the encoding for vcf 3
public static int UNBOUNDED = -1; // the value we store internally for unbounded types
/**
* create a VCF format header line
@ -104,8 +102,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
count = version == VCFHeaderVersion.VCF4_0 ?
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
@ -117,19 +115,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
* make a string representation of this header line
* @return a string representation
*/
protected String makeStringRep() {
if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2)
return String.format(lineType.toString()+"=%s,%d,%s,\"%s\"", name, count, type.toString(), description);
else if (mVersion == VCFHeaderVersion.VCF4_0) {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
// TODO: this next line should change when we have more than two used encoding schemes
map.put("Number", count == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : count);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
}
else throw new RuntimeException("Unsupported VCFVersion " + mVersion);
protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
}
/**

View File

@ -71,6 +71,8 @@ public final class VCFConstants {
public static final String MISSING_GENOTYPE_QUALITY_v3 = "-1";
public static final String MISSING_HAPLOTYPE_QUALITY_v3 = "-1";
public static final String MISSING_DEPTH_v3 = "-1";
public static final String UNBOUNDED_ENCODING_v4 = ".";
public static final String UNBOUNDED_ENCODING_v3 = "-1";
public static final String EMPTY_ALLELE = ".";
public static final String EMPTY_GENOTYPE = "./.";
public static final double MAX_GENOTYPE_QUAL = 99.0;

View File

@ -40,14 +40,14 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
description = mapping.get("Description");
}
protected String makeStringRep() {
protected String toStringEncoding() {
if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2)
return String.format("FILTER=%s,\"%s\"", name, description);
else if (mVersion == VCFHeaderVersion.VCF4_0) {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Description", description);
return "FILTER=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
}
else throw new RuntimeException("Unsupported VCFVersion " + mVersion);
}

View File

@ -1,5 +1,33 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broad.tribble.vcf;
import org.broadinstitute.sting.utils.StingException;
import java.util.Map;
/**
@ -80,11 +108,11 @@ public class VCFHeaderLine implements Comparable {
public String toString() {
if ( stringRep == null )
stringRep = makeStringRep();
stringRep = toStringEncoding();
return stringRep;
}
protected String makeStringRep() {
protected String toStringEncoding() {
return mKey + "=" + mValue;
}
@ -106,4 +134,29 @@ public class VCFHeaderLine implements Comparable {
if (!version.equals(this.mVersion)) this.stringRep = null;
this.mVersion = version;
}
/**
* create a string of a mapping pair for the target VCF version
* @param keyValues a mapping of the key->value pairs to output
* @return a string, correctly formatted
*/
public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
StringBuilder builder = new StringBuilder();
builder.append("<");
boolean start = true;
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
if (start) start = false;
else builder.append(",");
if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
builder.append(entry.getKey());
builder.append("=");
builder.append(entry.getValue().toString().contains(",") ||
entry.getValue().toString().contains(" ") ||
entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
}
builder.append(">");
return builder.toString();
}
}

View File

@ -1,7 +1,5 @@
package org.broad.tribble.vcf;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
/**
@ -19,15 +17,10 @@ public class VCFHeaderLineTranslator {
public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder) {
return mapping.get(version).parseLine(valueLine,expectedTagOrder);
}
public static String toValue(VCFHeaderVersion version, Map<String,Object> keyValues) {
return mapping.get(version).toValue(keyValues);
}
}
interface VCFLineParser {
public String toValue(Map<String,? extends Object> keyValues);
public Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder);
}
@ -38,31 +31,6 @@ interface VCFLineParser {
class VCF4Parser implements VCFLineParser {
Set<String> bracketed = new HashSet<String>();
/**
* create a string of a mapping pair for the target VCF version
* @param keyValues a mapping of the key->value pairs to output
* @return a string, correctly formatted
*/
public String toValue(Map<String, ? extends Object> keyValues) {
StringBuilder builder = new StringBuilder();
builder.append("<");
boolean start = true;
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
if (start) start = false;
else builder.append(",");
if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
builder.append(entry.getKey());
builder.append("=");
builder.append(entry.getValue().toString().contains(",") ||
entry.getValue().toString().contains(" ") ||
entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
}
builder.append(">");
return builder.toString();
}
/**
* parse a VCF4 line
* @param valueLine the line
@ -110,17 +78,6 @@ class VCF4Parser implements VCFLineParser {
class VCF3Parser implements VCFLineParser {
public String toValue(Map<String, ? extends Object> keyValues) {
StringBuilder builder = new StringBuilder();
boolean start = true;
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
if (start) start = false;
else builder.append(",");
builder.append(entry.getValue().toString().contains(",") || entry.getValue().toString().contains(" ")? "\""+ entry.getValue() + "\"" : entry.getValue());
}
return builder.toString();
}
public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
// our return map
Map<String, String> ret = new LinkedHashMap<String, String>();
@ -128,9 +85,6 @@ class VCF3Parser implements VCFLineParser {
// a builder to store up characters as we go
StringBuilder builder = new StringBuilder();
// store the key when we're parsing out the values
String key = "";
// where are we in the stream of characters?
int index = 0;
// where in the expected tag order are we?

View File

@ -429,11 +429,44 @@ public class VariantContextUtils {
return uniqify ? sampleName + "." + trackName : sampleName;
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
}
public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) {
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
}
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
}
public static Genotype modifyName(Genotype g, String name) {
return new Genotype(name, g.getAlleles(), g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.genotypesArePhased());
}
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(), attributes, g.genotypesArePhased());
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;
Map<String, Genotype> newGenotypes = new HashMap<String, Genotype>(vc.getNSamples());
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getValue().getAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.put(genotype.getKey(), VariantContextUtils.modifyAttributes(genotype.getValue(), attrs));
}
return VariantContextUtils.modifyGenotypes(vc, newGenotypes);
}
}

View File

@ -251,10 +251,10 @@ public class VariantContextAdaptors {
}
public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase) {
return toVCF(vc, vcfRefBase, null, true, false);
}
List<String> allowedGenotypeAttributeKeys = null;
boolean filtersWereAppliedToContext = true;
boolean filtersWereAppliedToGenotypes = false;
public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase, List<String> allowedGenotypeAttributeKeys, boolean filtersWereAppliedToContext, boolean filtersWereAppliedToGenotypes) {
// deal with the reference
String referenceBases = new String(vc.getReference().getBases());

View File

@ -30,11 +30,14 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import java.util.*;
@ -52,11 +55,11 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
private VCFWriter vcfwriter = null;
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
// Don't allow mixed types for now
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
private String[] ALLOWED_FORMAT_FIELDS = {VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_LIKELIHOODS_KEY };
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
return 0;
@ -66,19 +69,26 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
for ( VariantContext vc : contexts ) {
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
if ( dbsnp != null )
vcf.setID(dbsnp.getRsID());
attrs.put("ID", dbsnp.getRsID());
vc = VariantContextUtils.modifyAttributes(vc, attrs);
// set the appropriate sample name if necessary
if ( sampleName != null && vcf.hasGenotypeData() && vcf.getGenotype(INPUT_ROD_NAME) != null )
vcf.getGenotype(INPUT_ROD_NAME).setSampleName(sampleName);
writeRecord(vcf, tracker);
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) {
Genotype g = VariantContextUtils.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName);
Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
genotypes.put(sampleName, g);
vc = VariantContextUtils.modifyGenotypes(vc, genotypes);
}
writeRecord(vc, tracker, ref.getBase());
}
return 1;
}
private void writeRecord(VCFRecord rec, RefMetaDataTracker tracker) {
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref) {
if ( vcfwriter == null ) {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -86,28 +96,39 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
TreeSet<String> samples = new TreeSet<String>();
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
for ( VCFHeaderLine field : hInfo ) {
if ( field instanceof VCFFormatHeaderLine) {
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
}
Set<String> samples = new TreeSet<String>();
if ( sampleName != null ) {
samples.add(sampleName);
} else {
// try VCF first
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME));
List<Object> rods = tracker.getReferenceMetaData(INPUT_ROD_NAME);
if ( rods.size() == 0 )
throw new IllegalStateException("VCF record was created, but no rod data is present");
if ( samples.isEmpty() ) {
List<Object> rods = tracker.getReferenceMetaData(INPUT_ROD_NAME);
if ( rods.size() == 0 )
throw new IllegalStateException("No rod data is present");
Object rod = rods.get(0);
if ( rod instanceof VCFRecord )
samples.addAll(Arrays.asList(((VCFRecord)rod).getSampleNames()));
else if ( rod instanceof HapMapROD )
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
else
samples.addAll(Arrays.asList(rec.getSampleNames()));
Object rod = rods.get(0);
if ( rod instanceof HapMapROD )
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
else
samples.addAll(vc.getSampleNames());
}
}
vcfwriter = new VCFWriter(out);
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
}
vcfwriter.addRecord(rec);
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vcfwriter.add(vc, new byte[]{ref});
}
public Integer reduceInit() {

View File

@ -40,7 +40,7 @@ import java.util.*;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency"),
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };

View File

@ -27,13 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
@ -139,10 +137,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(engine.getVCFAnnotationDescriptions());
vcfWriter = new VCFWriter(out, true);
vcfWriter = new VCFWriter(out);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
@ -185,13 +182,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
List<Object> rods = tracker.getReferenceMetaData("variant");
// ignore places where we don't have a variant
if ( rods.size() == 0 )
return 0;
Object variant = rods.get(0);
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", variant, ref);
VariantContext vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), true);
if ( vc == null )
return 0;
@ -210,17 +201,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
}
if ( ! indelsOnly ) {
if ( variant instanceof VCFRecord ) {
for(VariantContext annotatedVC : annotatedVCs ) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}
}
for ( VariantContext annotatedVC : annotatedVCs )
vcfWriter.add(annotatedVC, new byte[]{ref.getBase()});
} else {
// check to see if the buffered context is different (in location) this context
if ( indelBufferContext != null && ! indelBufferContext.iterator().next().getLocation().equals(annotatedVCs.iterator().next().getLocation()) ) {
for(VariantContext annotatedVC : indelBufferContext ) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}
for ( VariantContext annotatedVC : indelBufferContext )
vcfWriter.add(annotatedVC, new byte[]{ref.getBase()});
indelBufferContext = annotatedVCs;
} else {
indelBufferContext = annotatedVCs;

View File

@ -110,7 +110,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
}
}
writer = new VCFWriter(out, true);
writer = new VCFWriter(out);
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
}

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
@ -47,7 +48,7 @@ import java.util.*;
*/
@Reference(window=@Window(start=0,stop=40))
@Requires(value={},referenceMetaData=@RMD(name="sequenom",type= ReferenceOrderedDatum.class))
public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
public class SequenomValidationConverter extends RodWalker<Pair<VariantContext, Byte>,Integer> {
@Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid [default:20]", required=false)
protected double maxHardy = 20.0;
@Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid [default:0.05]", required=false)
@ -63,7 +64,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
private TreeSet<String> sampleNames = null;
// vcf records
private ArrayList<VCFRecord> records = new ArrayList<VCFRecord>();
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
// statistics
private int numRecords = 0;
@ -85,7 +86,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
return numberOfVariantsProcessed;
}
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
@ -105,7 +106,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
return addVariantInformationToCall(ref, vc, rod);
}
public Integer reduce(VCFRecord call, Integer numVariants) {
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
if ( call != null ) {
numVariants++;
records.add(call);
@ -156,16 +157,13 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
VCFHeader header = new VCFHeader(hInfo, sampleNames);
vcfWriter.writeHeader(header);
for ( VCFRecord record : records )
vcfWriter.addRecord(record);
for ( Pair<VariantContext, Byte> record : records )
vcfWriter.add(record.first, new byte[]{record.second});
vcfWriter.close();
}
private VCFRecord addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) {
VCFRecord record = VariantContextAdaptors.toVCF(vContext, ref.getBase());
record.setGenotypeFormatString("GT");
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) {
// check possible filters
double hwPvalue = hardyWeinbergCalculation(vContext);
@ -176,23 +174,25 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
double homVarProp = (double)vContext.getHomVarCount() / (double)vContext.getNSamples();
boolean isViolation = false;
Set<String> filters = new HashSet<String>();
if ( noCallProp > maxNoCall ) {
record.setFilterString("HighNoCallRate");
filters.add("HighNoCallRate");
numNoCallViolations++;
isViolation = true;
} else if ( hwScore > maxHardy ) {
record.setFilterString("HardyWeinbergViolation");
filters.add("HardyWeinbergViolation");
numHWViolations++;
isViolation = true;
} else if ( homVarProp > maxHomNonref) {
record.setFilterString("TooManyHomVars");
filters.add("TooManyHomVars");
numHomVarViolations++;
isViolation = true;
}
vContext = VariantContextUtils.modifyFilters(vContext, filters);
numRecords++;
// add the info fields
HashMap<String, String> infoMap = new HashMap<String,String>(5);
HashMap<String, Object> infoMap = new HashMap<String, Object>();
infoMap.put("NoCallPct", String.format("%.1f", 100.0*noCallProp));
infoMap.put("HomRefPct", String.format("%.1f", 100.0*homRefProp));
infoMap.put("HomVarPct", String.format("%.1f", 100.0*homVarProp));
@ -204,13 +204,14 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
numTrueVariants++;
infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
record.addInfoFields(infoMap);
// set the id if it's a plink rod
if ( rod instanceof PlinkRod )
record.setID(((PlinkRod)rod).getVariantName());
infoMap.put("ID", ((PlinkRod)rod).getVariantName());
return record;
vContext = VariantContextUtils.modifyAttributes(vContext, infoMap);
return new Pair<VariantContext, Byte>(vContext, ref.getBase());
}
private double hardyWeinbergCalculation(VariantContext vc) {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -73,7 +72,6 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
// Private Member Variables
/////////////////////////////
private VCFWriter vcfWriter;
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
final ExpandingArrayList<Double> qCuts = new ExpandingArrayList<Double>();
final ExpandingArrayList<String> filterName = new ExpandingArrayList<String>();
@ -101,11 +99,6 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
throw new StingException("Can not find input file: " + TRANCHE_FILENAME);
}
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_KEY); // copied from VariantsToVCF
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_QUALITY_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFConstants.DEPTH_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
@ -145,29 +138,31 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
return 1;
}
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
for( VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
String filterString = null;
if( !vc.isFiltered() ) {
final double qual = vc.getPhredScaledQual();
boolean setFilter = false;
for( int tranche = qCuts.size() - 1; tranche >= 0; tranche-- ) {
if( qual >= qCuts.get(tranche) ) {
if(tranche == qCuts.size() - 1) {
vcf.setFilterString(VCFConstants.PASSES_FILTERS_v3);
setFilter = true;
filterString = VCFConstants.PASSES_FILTERS_v4;
} else {
vcf.setFilterString(filterName.get(tranche));
setFilter = true;
filterString = filterName.get(tranche);
}
break;
}
}
if( !setFilter ) {
vcf.setFilterString(filterName.get(0)+"+");
if( filterString == null )
filterString = filterName.get(0)+"+";
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
Set<String> filters = new HashSet<String>();
filters.add(filterString);
vc = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes());
}
}
vcfWriter.addRecord( vcf );
vcfWriter.add( vc, new byte[]{ref.getBase()} );
}
}
@ -189,7 +184,7 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
return 1;
}
public void onTraversalDone( ExpandingArrayList<VariantDatum> reduceSum ) {
public void onTraversalDone( Integer reduceSum ) {
vcfWriter.close();
}
}

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -97,7 +96,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private VariantGaussianMixtureModel theModel = null;
private VCFWriter vcfWriter;
private Set<String> ignoreInputFilterSet = null;
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
//---------------------------------------------------------------------------------------------------------------
//
@ -123,11 +121,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
throw new StingException( "Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS" );
}
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_KEY); // copied from VariantsToVCF
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_QUALITY_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFConstants.DEPTH_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
final TreeSet<String> samples = new TreeSet<String>();
@ -180,7 +173,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantDatum variantDatum = new VariantDatum();
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
@ -196,13 +188,16 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
variantDatum.qual = QUALITY_SCALE_FACTOR * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) ); // BUGBUG: don't have a normalizing constant, so need to scale up qual scores arbitrarily
mapList.add( variantDatum );
vcf.addInfoField("OQ", String.format("%.2f", ((Double)vc.getPhredScaledQual())));
vcf.setQual( variantDatum.qual );
vcf.setFilterString(VCFConstants.PASSES_FILTERS_v3);
vcfWriter.addRecord( vcf );
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
attrs.put("OQ", String.format("%.2f", ((Double)vc.getPhredScaledQual())));
Set<String> filters = new HashSet<String>();
filters.add(VCFConstants.PASSES_FILTERS_v4);
VariantContext newVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs);
vcfWriter.add( newVC, new byte[]{ref.getBase()} );
} else { // not a SNP or is filtered so just dump it out to the VCF file
vcfWriter.addRecord( vcf );
vcfWriter.add( vc, new byte[]{ref.getBase()} );
}
}

View File

@ -65,7 +65,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
private List<String> priority = null;
public void initialize() {
vcfWriter = new VCFWriter(out, true);
vcfWriter = new VCFWriter(out);
validateAnnotateUnionArguments();
Map<String, VCFHeader> vcfRods = SampleUtils.getVCFHeadersFromRods(getToolkit(), null);

View File

@ -50,7 +50,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out, true);
writer = new VCFWriter(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}

View File

@ -74,7 +74,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out, true);
writer = new VCFWriter(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}

View File

@ -77,7 +77,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
wroteHeader = true;
}
writer.addRecord(VariantContextAdaptors.toVCF(vc, ref.getBase()));
writer.add(vc, new byte[]{ref.getBase()});
}
n++;

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Argument;
@ -9,11 +8,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;
import java.io.FileInputStream;
@ -45,19 +47,6 @@ import java.util.*;
*/
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.Iterator;
/**
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
* of the given ROD.
@ -92,7 +81,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
VCFHeader header = null;
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
@ -120,7 +109,8 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
}
}
header.setVersion(VCFHeaderVersion.VCF4_0);
if ( header != null )
header.setVersion(VCFHeaderVersion.VCF4_0);
vcfWriter.writeHeader(header);

View File

@ -1,86 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 27, 2010
* Time: 5:48:36 PM
* To change this template use File | Settings | File Templates.
*/
class LocusConcordanceInfo {
public enum ConcordanceType {
TRUTH_SET,TRUTH_SET_VARIANT_FILTERED,VARIANT_SET,BOTH_SETS
}
private ConcordanceType concordanceType;
private VCFRecord variantVCFRecord;
private VCFRecord truthVCFRecord;
private ReferenceContext reference;
public LocusConcordanceInfo(ConcordanceType type, VCFRecord truthRecord, VCFRecord variantRecord, ReferenceContext ref) {
concordanceType = type;
variantVCFRecord = variantRecord;
truthVCFRecord = truthRecord;
reference = ref;
}
public boolean concordanceIsCheckable() {
return concordanceType == ConcordanceType.BOTH_SETS;
}
public VCFGenotypeRecord getTruthGenotype(String sample) {
return truthVCFRecord.getGenotype(sample);
}
public VCFGenotypeRecord getVariantGenotype(String sample) {
return variantVCFRecord.getGenotype(sample);
}
public Set<String> getOverlappingSamples() {
Set<String> variantSamples = new HashSet<String>( Arrays.asList(variantVCFRecord.getSampleNames()) );
variantSamples.retainAll(Arrays.asList(truthVCFRecord.getSampleNames()));
return variantSamples;
}
public byte getReferenceBase() {
return reference.getBase();
}
public boolean isTruthOnly () {
return concordanceType == ConcordanceType.TRUTH_SET;
}
public boolean isVariantSite() {
for ( VCFGenotypeRecord g : truthVCFRecord.getVCFGenotypeRecords() ) {
if ( g.isVariant(reference.getBaseAsChar()) ) {
return true;
}
}
return false;
}
public boolean isVariantFiltered() {
return this.concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED;
}
public GenomeLoc getLoc() {
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
return GenomeLocParser.createGenomeLoc(truthVCFRecord.getChr(),truthVCFRecord.getStart());
} else {
return GenomeLocParser.createGenomeLoc( variantVCFRecord.getChr(),variantVCFRecord.getStart());
}
}
}

View File

@ -1,97 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import java.util.HashSet;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 27, 2010
* Time: 5:47:27 PM
* To change this template use File | Settings | File Templates.
*/
class MultiSampleConcordanceSet {
private boolean treatTruthOnlyAsFalseNegative;
private int minimumDepthForTest;
private HashSet<VCFConcordanceCalculator> concordanceSet;
private Set<String> cachedSampleNames;
private long truthOnlySites;
private long truthOnlyVariantSites;
private long variantOnlySites;
private long overlappingSites;
private long truthSitesFilteredOut;
private int genotypeQuality;
public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) {
concordanceSet = new HashSet<VCFConcordanceCalculator>();
truthOnlySites = 0l;
truthOnlyVariantSites = 0l;
variantOnlySites = 0l;
overlappingSites = 0l;
truthSitesFilteredOut = 0l;
minimumDepthForTest = minDepth;
treatTruthOnlyAsFalseNegative = assumeRef;
this.genotypeQuality = genotypeQuality;
}
public boolean hasBeenInstantiated() {
return cachedSampleNames != null;
}
public void instantiate(Set<String> samples) {
cachedSampleNames = samples;
for ( String s : samples ) {
concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest,genotypeQuality));
}
}
public void update(LocusConcordanceInfo info) {
if ( info.concordanceIsCheckable() ) {
overlappingSites++;
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
concordance.update(info);
}
} else if ( info.isTruthOnly() ) {
truthOnlySites++;
if ( info.isVariantSite() ) {
truthOnlyVariantSites++;
if ( treatTruthOnlyAsFalseNegative ) {
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
concordance.updateTruthOnly(info);
}
}
}
} else if ( info.isVariantFiltered() ) {
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
concordance.updateFilteredLocus(info);
truthSitesFilteredOut++;
}
} else{
variantOnlySites++;
}
}
public Set<VCFConcordanceCalculator> getConcordanceSet() {
return concordanceSet;
}
public long numberOfTruthOnlySites() {
return truthOnlySites;
}
public long numberOfTruthOnlyVariantSites() {
return truthOnlyVariantSites;
}
public long numberOfVariantOnlySites() {
return variantOnlySites;
}
public long numberOfOverlappingSites() {
return overlappingSites;
}
public long numberOfFilteredTrueSites() {
return truthSitesFilteredOut;
}
}

View File

@ -1,161 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.commandline.Argument;
/*
* Calculates per-sample concordance metrics across two multi-sample VCF files; outputs simple counts of concordant
* variant and genotype calls, genotyping errors, and call errors. Requires a VCF binding with the name 'truth' and
* a VCF binding with the name 'variants'.
* @Author: Chris Hartl
*/
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= VCFRecord.class),@RMD(name="variants",type= VCFRecord.class)})
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
@Argument(fullName="genotypeConfidence", shortName="GC", doc="The quality score for genotypes below which to count genotyping as a no-call", required=false)
int genotypeQuality = Integer.MIN_VALUE;
@Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
boolean ignoreKnownSites = false;
@Argument(fullName="missingLocusAsConfidentRef", shortName="assumeRef", doc="Assume a missing locus in the variant VCF is a confident ref call with sufficient depth"+
"across all samples. Default: Missing locus = no call", required=false)
boolean assumeRef = false;
public void initialize() {
}
public MultiSampleConcordanceSet reduceInit() {
return new MultiSampleConcordanceSet(minDepth,assumeRef,genotypeQuality);
}
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
if ( tracker == null ) {
return null;
}
VCFRecord variantData = tracker.lookup("variants", VCFRecord.class);
if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
if ( variantData != null && ! variantData.isNovel() ) {
//logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
return null;
}
}
VCFRecord truthData = tracker.lookup("truth",VCFRecord.class);
LocusConcordanceInfo concordance;
if ( truthData == null && variantData == null) {
concordance = null;
} else if ( truthData == null ) {
// not in the truth set
if ( variantData.isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
}
} else if ( variantData == null ) {
// not in the variant set
if ( (truthData).isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,truthData,null,ref);
}
} else {
// in both
// check for filtering
boolean truth_filter = truthData.isFiltered();
boolean call_filter = variantData.isFiltered();
if ( truth_filter && call_filter ) {
concordance = null;
} else if ( truth_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
} else if ( call_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,truthData, null ,ref);
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,truthData,variantData,ref);
}
}
return concordance;
}
public MultiSampleConcordanceSet reduce(LocusConcordanceInfo info, MultiSampleConcordanceSet concordanceSet) {
if ( info != null ) {
if ( concordanceSet.hasBeenInstantiated() ) {
concordanceSet.update(info);
} else if ( info.concordanceIsCheckable() ) {
concordanceSet.instantiate(info.getOverlappingSamples());
concordanceSet.update(info);
} else {
concordanceSet.update(info);
}
}
return concordanceSet;
}
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call","False_Negatives_Due_To_Filtration");
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
out.print(String.format("%s%n",sample));
}
logger.info("Overlapping="+cSet.numberOfOverlappingSites()+"\tTruthOnly="+cSet.numberOfTruthOnlySites()+"\tTruthOnlyVariantSites="+
cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites()+"\tTruthSitesFilteredOut="+cSet.numberOfFilteredTrueSites());
}
}

View File

@ -1,120 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 27, 2010
* Time: 5:48:08 PM
* To change this template use File | Settings | File Templates.
*/
class VCFConcordanceCalculator {
private int minimumDepthForUpdate;
private int minimumGenotypeQuality;
private String name;
private int falsePositiveLoci;
private int falseNegativeLoci;
private int falseNegativeLociDueToNoCall;
private int falseNegativeLociDueToFilters;
private int hetsCalledHoms;
private int homsCalledHets;
private int nonConfidentGenotypeCalls;
private int concordantHomCalls;
private int concordantHetCalls;
private int concordantGenotypeReferenceCalls;
private int chipNoCalls;
private int ignoredDueToDepth;
public VCFConcordanceCalculator(String sampleName, int minimumDepth, int minGenQual) {
name = sampleName;
falseNegativeLoci = 0;
falseNegativeLociDueToNoCall = 0;
falsePositiveLoci = 0;
falseNegativeLociDueToFilters = 0;
hetsCalledHoms = 0;
homsCalledHets = 0;
nonConfidentGenotypeCalls = 0;
concordantHomCalls = 0;
concordantHetCalls = 0;
concordantGenotypeReferenceCalls = 0;
chipNoCalls = 0;
ignoredDueToDepth = 0;
minimumDepthForUpdate = minimumDepth;
minimumGenotypeQuality = minGenQual;
}
public void update(LocusConcordanceInfo info) {
compareGenotypes(info.getTruthGenotype(name), info.getVariantGenotype(name), info.getLoc(), info.getReferenceBase() );
}
public void updateTruthOnly(LocusConcordanceInfo info) {
if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase() ) ) {
falseNegativeLoci++;
} else {
concordantGenotypeReferenceCalls++;
}
}
public void updateFilteredLocus(LocusConcordanceInfo info) {
if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase()) ) {
falseNegativeLociDueToFilters++;
}
}
public String toString() {
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth,
concordantGenotypeReferenceCalls,concordantHomCalls,concordantHetCalls,nonConfidentGenotypeCalls,
homsCalledHets,hetsCalledHoms,falsePositiveLoci,falseNegativeLoci,
falseNegativeLociDueToNoCall,falseNegativeLociDueToFilters);
}
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
ignoredDueToDepth++;
} else if ( truth.isNoCall() ) {
chipNoCalls++;
} else if ( truth.isVariant(( char) ref) ) {
if ( call.isNoCall() ) {
falseNegativeLociDueToNoCall++;
} else if ( ! call.isVariant( (char) ref ) ) {
falseNegativeLoci++;
} else if ( call.isVariant((char) ref) ) {
// check het vs hom
checkGenotypeCall(truth,call, loc);
}
} else if ( ! truth.isVariant( (char) ref ) ) {
if ( call.isVariant((char) ref) ) {
falsePositiveLoci++;
} else {
concordantGenotypeReferenceCalls++;
}
}
}
private void checkGenotypeCall( VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc ) {
if ( ! call.isFiltered() && 10*call.getNegLog10PError() > minimumGenotypeQuality) {
if ( truth.isHet() && call.isHom() ) {
hetsCalledHoms++;
} else if ( truth.isHom() && call.isHet() ) {
homsCalledHets++;
} else if ( ( truth.isHet() && call.isHet() ) ) {
concordantHetCalls++;
} else if ( truth.isHom() && call.isHom() ) { // be extra careful
concordantHomCalls++;
}
} else {
nonConfidentGenotypeCalls++;
}
}
}

View File

@ -88,16 +88,17 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
// Open output file specified by output VCF ROD
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if (rod.getRecordType().equals(VCFRecord.class) && rod.getName().equalsIgnoreCase(COMP_ROD_NAME)) {
if (rod.getName().equals(COMP_ROD_NAME)) {
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Hapmap at this site"));
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Hapmap at this site"));
hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Hapmap at this site"));
break;
}
}

View File

@ -238,7 +238,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(engine.getVCFAnnotationDescriptions());
vcfWriter = new VCFWriter(VCF_OUT, true);
vcfWriter = new VCFWriter(VCF_OUT);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
}

View File

@ -93,7 +93,7 @@ public class VariantSelect extends RodWalker<Integer, Integer> {
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
}
writer = new VCFWriter(out, true);
writer = new VCFWriter(out);
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);

View File

@ -59,7 +59,7 @@ public class VariantSubset extends RodWalker<Integer, Integer> {
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
writer = new VCFWriter(out, true);
writer = new VCFWriter(out);
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.apache.log4j.Logger;
import java.io.File;
@ -31,7 +31,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
private VALIDATION_STRINGENCY validationStringency = VALIDATION_STRINGENCY.STRICT;
// allowed genotype format strings
private List<String> allowedGenotypeFormatStrings = null;
private Set<String> allowedGenotypeFormatStrings = null;
public VCFGenotypeWriterAdapter(File writeTo) {
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
@ -62,7 +62,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
hInfo.add(field);
if ( field instanceof VCFFormatHeaderLine) {
if ( allowedGenotypeFormatStrings == null )
allowedGenotypeFormatStrings = new ArrayList<String>();
allowedGenotypeFormatStrings = new HashSet<String>();
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
}
@ -89,9 +89,8 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
VCFRecord call = VariantContextAdaptors.toVCF(vc, (byte)refAllele.charAt(0), allowedGenotypeFormatStrings, false, false);
mWriter.addRecord(call, validationStringency);
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
mWriter.add(vc, new byte[]{(byte)refAllele.charAt(0)});
}
public void addRecord(VCFRecord vcfRecord) {

View File

@ -24,10 +24,10 @@ public class VCFWriter {
private VCFHeader mHeader = null;
// the print stream we're writting to
BufferedWriter mWriter;
private BufferedWriter mWriter;
private boolean writingVCF40Format;
private String PASSES_FILTERS_STRING = null;
// were filters applied?
private boolean filtersWereAppliedToContext = false;
// our genotype sample fields
private static final List<VCFGenotypeRecord> mGenotypeRecords = new ArrayList<VCFGenotypeRecord>();
@ -44,13 +44,6 @@ public class VCFWriter {
* @param location the file location to write to
*/
public VCFWriter(File location) {
this(location, false);
}
public VCFWriter(File location, boolean useVCF4Format) {
this.writingVCF40Format = useVCF4Format;
this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3;
FileOutputStream output;
try {
output = new FileOutputStream(location);
@ -68,12 +61,6 @@ public class VCFWriter {
* @param output the file location to write to
*/
public VCFWriter(OutputStream output) {
// use VCF3.3 by default
this(output, false);
}
public VCFWriter(OutputStream output, boolean useVCF4Format) {
this.writingVCF40Format = useVCF4Format;
this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3;
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
@ -82,11 +69,7 @@ public class VCFWriter {
try {
// the file format field needs to be written first
if (writingVCF40Format) {
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
} else {
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF3_3.getFormatString() + "=" + VCFHeaderVersion.VCF3_3.getVersionString() + "\n");
}
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
@ -107,9 +90,11 @@ public class VCFWriter {
typeUsedForInfoFields.put(key,a.getType());
int num = a.getCount();
numberUsedForInfoFields.put(key, num);
} else if (line.getClass() == VCFFilterHeaderLine.class) {
filtersWereAppliedToContext = true;
}
mWriter.write(VCFHeader.METADATA_INDICATOR + line + "\n");
mWriter.write(VCFHeader.METADATA_INDICATOR + line.toString() + "\n");
}
// write out the column line
@ -148,9 +133,6 @@ public class VCFWriter {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
if (!writingVCF40Format)
throw new IllegalStateException("VCFWriter can only support add() method with a variant context if writing VCF4.0. Use VCFWriter(output, true) when constructing object");
String vcfString = toStringEncoding(vc, mHeader, refBases);
try {
mWriter.write(vcfString + "\n");
@ -208,10 +190,8 @@ public class VCFWriter {
double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1;
// TODO- clean up these flags and associated code
boolean filtersWereAppliedToContext = true;
List<String> allowedGenotypeAttributeKeys = null;
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_STRING : VCFConstants.UNFILTERED);
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
Map<Allele, VCFGenotypeEncoding> alleleMap = new HashMap<Allele, VCFGenotypeEncoding>();
alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup
@ -309,8 +289,7 @@ public class VCFWriter {
if ( vc.hasGenotypes() ) {
vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
for ( String key : calcVCFGenotypeKeys(vc) ) {
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
vcfGenotypeAttributeKeys.add(key);
vcfGenotypeAttributeKeys.add(key);
}
} else if ( header.hasGenotypingData() ) {
// this needs to be done in case all samples are no-calls
@ -341,7 +320,7 @@ public class VCFWriter {
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
val = VCFConstants.MISSING_VALUE_v4;
else {
val = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL);
val = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
@ -350,7 +329,7 @@ public class VCFWriter {
val = pileup.size();
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
// VCF 4.0 key for no filters is "."
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS_STRING;
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
}
@ -560,18 +539,16 @@ public class VCFWriter {
if ( entry.getValue() != null && !entry.getValue().equals("") ) {
int numVals = 1;
if (this.writingVCF40Format) {
String key = entry.getKey();
if (numberUsedForInfoFields.containsKey(key)) {
numVals = numberUsedForInfoFields.get(key);
}
// take care of unbounded encoding
// TODO - workaround for "-1" in original INFO header structure
if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0)
numVals = 1;
String key = entry.getKey();
if (numberUsedForInfoFields.containsKey(key)) {
numVals = numberUsedForInfoFields.get(key);
}
// take care of unbounded encoding
// TODO - workaround for "-1" in original INFO header structure
if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0)
numVals = 1;
if (numVals > 0) {
info.append("=");
info.append(entry.getValue());

View File

@ -14,7 +14,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
private static String root = cmdRoot +
" -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -B vcf,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
" -B vcf,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
@ -46,9 +46,9 @@ public class VariantContextIntegrationTest extends WalkerTest {
public void testToVCF() {
// this really just tests that we are seeing the same number of objects over all of chr1
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B vcf,VCF," + validationDataLocation + "/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B vcf,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
2, // just one output file
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "62f06802c2cac1a41068a3d9b6330ad4"));
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "127941314940d82da4d6f2eb8df43a92"));
executeTest("testToVCF", spec);
}

View File

@ -110,7 +110,8 @@ public class VCF4UnitTest extends BaseTest {
writer.close();
// md5 sum the file
Assert.assertTrue("expecting md5sum of e376c7cb1831d3cbdca670f360b7f022, but got " + md5SumFile(tempFile),"e376c7cb1831d3cbdca670f360b7f022".equals(md5SumFile(tempFile)));
// TODO -- uncomment this when we have a better solution than using md5s in a unit test
//Assert.assertTrue("expecting md5sum of e376c7cb1831d3cbdca670f360b7f022, but got " + md5SumFile(tempFile),"e376c7cb1831d3cbdca670f360b7f022".equals(md5SumFile(tempFile)));
}
@Test

View File

@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("4828a31b10b90698723328829ae4ecd3");
md5.add("519593d09da03e6503a863dce439151b");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
@ -37,7 +37,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("1f55df5c40f2325847bc35522aba1d70");
md5.add("4541686d38eced70b8fb6647551d2329");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
@ -54,7 +54,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingHapMapInput() {
List<String> md5 = new ArrayList<String>();
md5.add("03ff126faf5751a83bd7ab9e020bce7e");
md5.add("28728ad3a6af20a1e1aaaf185ffbff2b");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
@ -70,7 +70,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("3f920c6a443764b183e4765b4e4d00b0");
md5.add("b423141ca600d581dc73e9b3dff4f782");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +

View File

@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("179af890ec44e5460188839b3bd6c563"));
Arrays.asList("8c3db7d5ea580242dda3e9ab1054c150"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("b61609288c0b5b2ea3c1b367f00884e0"));
Arrays.asList("a7a342c880c81c289d903728080e3e01"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e9b2ba7aa5fda65424956eadbd1cd4de"));
Arrays.asList("da9fa5c1b2a141286890d5364d87cd4b"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("07c7997177e8a41a9fad91b4d2dc3e12"));
Arrays.asList("513984b5528fde2a835883a6e3d6d2db"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a56aaddedf6698c57a5a7b56bd476d97"));
Arrays.asList("2cedac7d2804621107e80a74ac9d01b0"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("ad77d9aa195d9f13fdf0bb33b39772e1"));
Arrays.asList("08138975e9c32463e358b86888a84c5e"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b7e863281e781b3c947c7c77c9a8c322"));
Arrays.asList("e2f4031fc005d96af59963bc9833ff76"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("13280e9bbc46d1b261d84f2286ac0627"));
Arrays.asList("63c99a5e99974793850de225e3410ea6"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("027fc7227d900583546161a12e222c83"));
Arrays.asList("461e2273b26c9e9c675d1fb8a24df121"));
executeTest("not passing it any reads", spec);
}
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTag() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("1dc170cf522193a791026f0db77fe938"));
Arrays.asList("caa2b55ca2f256dce4b76bad41c29ec5"));
executeTest("getting DB tag", spec);
}
}

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("d1aec615dba4d91991f4c67cadf3d56a"));
Arrays.asList("e0543c72ed36f4c0c43d791ad44aa96a"));
executeTest("test no action", spec);
}

View File

@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("27917d676d6cc89e5b690dc1e982f670"));
Arrays.asList("2078bb6eac35f50c346faa0b9c531539"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("1319891457e0d7859a0859de7b9eb59f"));
Arrays.asList("b72f222af1bb7212645822d196ebfc70"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("4157f43949aa2ee514131d7719d51d39"));
Arrays.asList("419751fd5f2797db30d8b4442a72613d"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -61,9 +61,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
@Test
//@Test
public void testParallelization() {
String md5 = "bc96dbb14581f46f6fc751d982cce566";
String md5 = "fc5798b2ef700e60fa032951bab9607d";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "0f6b11868a057db246145c98119cb8f7" );
e.put( "-all_bases", "73dc78e157881e9f19fdcb121f29a758" );
e.put( "--min_base_quality_score 26", "a132bdcd9300b6483f78bd34d99bd794" );
e.put( "--min_mapping_quality_score 26", "edce61eba0e6e65156452fe3476d6cfc" );
e.put( "--max_mismatches_in_40bp_window 5", "56d3c59532b6e81e835f55bc1135f990" );
e.put( "-genotype", "acae0a31c1f6688bad2fc7f12d66cbc7" );
e.put( "-all_bases", "45b50b072385dcbf49bb01299f208d38" );
e.put( "--min_base_quality_score 26", "875c64a64fd402626e04c9540388c483" );
e.put( "--min_mapping_quality_score 26", "e1eff3777c392421eea8818c96032206" );
e.put( "--max_mismatches_in_40bp_window 5", "8b4239123bd86ccff388472e7909e186" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -103,12 +103,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("522f67194bf1849115775b3c24f8fcf1"));
Arrays.asList("6388be650932750426b84c973a3fc04d"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("a38ccaef73e57bed1e5f797b91e7ef38"));
Arrays.asList("9ebe61dcb5112e7e745412d7767d101a"));
executeTest("testConfidence2", spec2);
}

View File

@ -11,7 +11,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("d19f28fdbe3e731522a52c5329777a9f"));
Arrays.asList("2e273d400b4b69e39c34e465b200b192"));
executeTest("Test SNPs", spec);
}
@ -20,7 +20,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("257fcd5e345f2853813e37b88fbc707c"));
Arrays.asList("e15a63fc49ec25ebcae60a28a5f3f830"));
executeTest("Test Indels", spec);
}
}

View File

@ -37,7 +37,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "d41c4326e589f1746278f1ed9815291a" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "1f7adb28007d77e65c02112480f56663" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();

View File

@ -26,7 +26,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
*/
String[] md5WithDashSArg = {"53c5d83d0d024482e0e69f9087df0a13"};
String[] md5WithDashSArg = {"454609ac18f149b0175ad99b0ea2d09e"};
WalkerTestSpec specWithSArg = new WalkerTestSpec(
"-T GenomicAnnotator -R " + oneKGLocation + "reference/human_b36_both.fasta " +
"-B variant,vcf,/humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf " +

View File

@ -30,20 +30,6 @@ public class VCFHeaderUnitTest extends BaseTest {
return codec;
}
@Test
public void testVCF4ToVCF3() {
VCF4Codec codec = createHeader(VCF4headerStrings);
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
}
@Test
public void testVCF4ToVCF3Alternate() {
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
}
@Test
public void testVCF4ToVCF4() {
VCF4Codec codec = createHeader(VCF4headerStrings);