The GATK no longer writes vcf3.3; welcome to the world of vcf4.0. Needed to fix a few output bugs to get this to work, but it's looking great. Much more still to come. Guillermo: hopefully this doesn't break your local build too badly.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3786 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
530a320f28
commit
6b5c88d4d6
|
|
@ -62,9 +62,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
|
||||
// line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static int UNBOUNDED = -1; // the value we store internally for unbounded types
|
||||
public static String UNBOUNDED_ENCODING_VCF4 = "."; // the encoding for vcf 4
|
||||
public static String UNBOUNDED_ENCODING_VCF3 = "-1"; // the encoding for vcf 3
|
||||
public static int UNBOUNDED = -1; // the value we store internally for unbounded types
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
|
|
@ -104,8 +102,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
name = mapping.get("ID");
|
||||
count = version == VCFHeaderVersion.VCF4_0 ?
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||
|
|
@ -117,19 +115,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
* make a string representation of this header line
|
||||
* @return a string representation
|
||||
*/
|
||||
protected String makeStringRep() {
|
||||
if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2)
|
||||
return String.format(lineType.toString()+"=%s,%d,%s,\"%s\"", name, count, type.toString(), description);
|
||||
else if (mVersion == VCFHeaderVersion.VCF4_0) {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
// TODO: this next line should change when we have more than two used encoding schemes
|
||||
map.put("Number", count == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : count);
|
||||
map.put("Type", type);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
|
||||
}
|
||||
else throw new RuntimeException("Unsupported VCFVersion " + mVersion);
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
|
||||
map.put("Type", type);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ public final class VCFConstants {
|
|||
public static final String MISSING_GENOTYPE_QUALITY_v3 = "-1";
|
||||
public static final String MISSING_HAPLOTYPE_QUALITY_v3 = "-1";
|
||||
public static final String MISSING_DEPTH_v3 = "-1";
|
||||
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
||||
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
||||
public static final String EMPTY_ALLELE = ".";
|
||||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
||||
|
|
|
|||
|
|
@ -40,14 +40,14 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
|||
description = mapping.get("Description");
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
protected String toStringEncoding() {
|
||||
if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2)
|
||||
return String.format("FILTER=%s,\"%s\"", name, description);
|
||||
else if (mVersion == VCFHeaderVersion.VCF4_0) {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return "FILTER=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
|
||||
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
else throw new RuntimeException("Unsupported VCFVersion " + mVersion);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -80,11 +108,11 @@ public class VCFHeaderLine implements Comparable {
|
|||
|
||||
public String toString() {
|
||||
if ( stringRep == null )
|
||||
stringRep = makeStringRep();
|
||||
stringRep = toStringEncoding();
|
||||
return stringRep;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
protected String toStringEncoding() {
|
||||
return mKey + "=" + mValue;
|
||||
}
|
||||
|
||||
|
|
@ -106,4 +134,29 @@ public class VCFHeaderLine implements Comparable {
|
|||
if (!version.equals(this.mVersion)) this.stringRep = null;
|
||||
this.mVersion = version;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a string of a mapping pair for the target VCF version
|
||||
* @param keyValues a mapping of the key->value pairs to output
|
||||
* @return a string, correctly formatted
|
||||
*/
|
||||
public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("<");
|
||||
boolean start = true;
|
||||
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
|
||||
if (start) start = false;
|
||||
else builder.append(",");
|
||||
|
||||
if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
|
||||
|
||||
builder.append(entry.getKey());
|
||||
builder.append("=");
|
||||
builder.append(entry.getValue().toString().contains(",") ||
|
||||
entry.getValue().toString().contains(" ") ||
|
||||
entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
|
||||
}
|
||||
builder.append(">");
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -19,15 +17,10 @@ public class VCFHeaderLineTranslator {
|
|||
public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder) {
|
||||
return mapping.get(version).parseLine(valueLine,expectedTagOrder);
|
||||
}
|
||||
|
||||
public static String toValue(VCFHeaderVersion version, Map<String,Object> keyValues) {
|
||||
return mapping.get(version).toValue(keyValues);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
interface VCFLineParser {
|
||||
public String toValue(Map<String,? extends Object> keyValues);
|
||||
public Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder);
|
||||
}
|
||||
|
||||
|
|
@ -38,31 +31,6 @@ interface VCFLineParser {
|
|||
class VCF4Parser implements VCFLineParser {
|
||||
Set<String> bracketed = new HashSet<String>();
|
||||
|
||||
/**
|
||||
* create a string of a mapping pair for the target VCF version
|
||||
* @param keyValues a mapping of the key->value pairs to output
|
||||
* @return a string, correctly formatted
|
||||
*/
|
||||
public String toValue(Map<String, ? extends Object> keyValues) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("<");
|
||||
boolean start = true;
|
||||
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
|
||||
if (start) start = false;
|
||||
else builder.append(",");
|
||||
|
||||
if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
|
||||
|
||||
builder.append(entry.getKey());
|
||||
builder.append("=");
|
||||
builder.append(entry.getValue().toString().contains(",") ||
|
||||
entry.getValue().toString().contains(" ") ||
|
||||
entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
|
||||
}
|
||||
builder.append(">");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a VCF4 line
|
||||
* @param valueLine the line
|
||||
|
|
@ -110,17 +78,6 @@ class VCF4Parser implements VCFLineParser {
|
|||
|
||||
class VCF3Parser implements VCFLineParser {
|
||||
|
||||
public String toValue(Map<String, ? extends Object> keyValues) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
boolean start = true;
|
||||
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
|
||||
if (start) start = false;
|
||||
else builder.append(",");
|
||||
builder.append(entry.getValue().toString().contains(",") || entry.getValue().toString().contains(" ")? "\""+ entry.getValue() + "\"" : entry.getValue());
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
|
||||
// our return map
|
||||
Map<String, String> ret = new LinkedHashMap<String, String>();
|
||||
|
|
@ -128,9 +85,6 @@ class VCF3Parser implements VCFLineParser {
|
|||
// a builder to store up characters as we go
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// store the key when we're parsing out the values
|
||||
String key = "";
|
||||
|
||||
// where are we in the stream of characters?
|
||||
int index = 0;
|
||||
// where in the expected tag order are we?
|
||||
|
|
|
|||
|
|
@ -429,11 +429,44 @@ public class VariantContextUtils {
|
|||
return uniqify ? sampleName + "." + trackName : sampleName;
|
||||
}
|
||||
|
||||
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
|
||||
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) {
|
||||
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
|
||||
}
|
||||
|
||||
public static Genotype modifyName(Genotype g, String name) {
|
||||
return new Genotype(name, g.getAlleles(), g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.genotypesArePhased());
|
||||
}
|
||||
|
||||
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(), attributes, g.genotypesArePhased());
|
||||
}
|
||||
|
||||
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
|
||||
if ( allowedAttributes == null )
|
||||
return vc;
|
||||
|
||||
Map<String, Genotype> newGenotypes = new HashMap<String, Genotype>(vc.getNSamples());
|
||||
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getValue().getAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.put(genotype.getKey(), VariantContextUtils.modifyAttributes(genotype.getValue(), attrs));
|
||||
}
|
||||
|
||||
return VariantContextUtils.modifyGenotypes(vc, newGenotypes);
|
||||
}
|
||||
}
|
||||
|
|
@ -251,10 +251,10 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
|
||||
public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase) {
|
||||
return toVCF(vc, vcfRefBase, null, true, false);
|
||||
}
|
||||
List<String> allowedGenotypeAttributeKeys = null;
|
||||
boolean filtersWereAppliedToContext = true;
|
||||
boolean filtersWereAppliedToGenotypes = false;
|
||||
|
||||
public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase, List<String> allowedGenotypeAttributeKeys, boolean filtersWereAppliedToContext, boolean filtersWereAppliedToGenotypes) {
|
||||
// deal with the reference
|
||||
String referenceBases = new String(vc.getReference().getBases());
|
||||
|
||||
|
|
|
|||
|
|
@ -30,11 +30,14 @@ import org.broad.tribble.vcf.*;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -52,11 +55,11 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
private VCFWriter vcfwriter = null;
|
||||
|
||||
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
||||
|
||||
// Don't allow mixed types for now
|
||||
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
|
||||
|
||||
private String[] ALLOWED_FORMAT_FIELDS = {VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_LIKELIHOODS_KEY };
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return 0;
|
||||
|
|
@ -66,19 +69,26 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
|
||||
|
||||
for ( VariantContext vc : contexts ) {
|
||||
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
if ( dbsnp != null )
|
||||
vcf.setID(dbsnp.getRsID());
|
||||
attrs.put("ID", dbsnp.getRsID());
|
||||
vc = VariantContextUtils.modifyAttributes(vc, attrs);
|
||||
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vcf.hasGenotypeData() && vcf.getGenotype(INPUT_ROD_NAME) != null )
|
||||
vcf.getGenotype(INPUT_ROD_NAME).setSampleName(sampleName);
|
||||
writeRecord(vcf, tracker);
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) {
|
||||
Genotype g = VariantContextUtils.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName);
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||
genotypes.put(sampleName, g);
|
||||
vc = VariantContextUtils.modifyGenotypes(vc, genotypes);
|
||||
}
|
||||
|
||||
writeRecord(vc, tracker, ref.getBase());
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
private void writeRecord(VCFRecord rec, RefMetaDataTracker tracker) {
|
||||
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref) {
|
||||
if ( vcfwriter == null ) {
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
|
|
@ -86,28 +96,39 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
TreeSet<String> samples = new TreeSet<String>();
|
||||
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( VCFHeaderLine field : hInfo ) {
|
||||
if ( field instanceof VCFFormatHeaderLine) {
|
||||
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
|
||||
}
|
||||
}
|
||||
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
if ( sampleName != null ) {
|
||||
samples.add(sampleName);
|
||||
} else {
|
||||
// try VCF first
|
||||
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME));
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData(INPUT_ROD_NAME);
|
||||
if ( rods.size() == 0 )
|
||||
throw new IllegalStateException("VCF record was created, but no rod data is present");
|
||||
if ( samples.isEmpty() ) {
|
||||
List<Object> rods = tracker.getReferenceMetaData(INPUT_ROD_NAME);
|
||||
if ( rods.size() == 0 )
|
||||
throw new IllegalStateException("No rod data is present");
|
||||
|
||||
Object rod = rods.get(0);
|
||||
if ( rod instanceof VCFRecord )
|
||||
samples.addAll(Arrays.asList(((VCFRecord)rod).getSampleNames()));
|
||||
else if ( rod instanceof HapMapROD )
|
||||
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
|
||||
else
|
||||
samples.addAll(Arrays.asList(rec.getSampleNames()));
|
||||
Object rod = rods.get(0);
|
||||
if ( rod instanceof HapMapROD )
|
||||
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
|
||||
else
|
||||
samples.addAll(vc.getSampleNames());
|
||||
}
|
||||
}
|
||||
|
||||
vcfwriter = new VCFWriter(out);
|
||||
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
|
||||
}
|
||||
vcfwriter.addRecord(rec);
|
||||
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vcfwriter.add(vc, new byte[]{ref});
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import java.util.*;
|
|||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency"),
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
|
|
|
|||
|
|
@ -27,13 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
|
||||
|
|
@ -139,10 +137,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
|
||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
|
||||
vcfWriter = new VCFWriter(out, true);
|
||||
vcfWriter = new VCFWriter(out);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
|
|
@ -185,13 +182,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData("variant");
|
||||
// ignore places where we don't have a variant
|
||||
if ( rods.size() == 0 )
|
||||
return 0;
|
||||
|
||||
Object variant = rods.get(0);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", variant, ref);
|
||||
VariantContext vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), true);
|
||||
if ( vc == null )
|
||||
return 0;
|
||||
|
||||
|
|
@ -210,17 +201,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
if ( ! indelsOnly ) {
|
||||
if ( variant instanceof VCFRecord ) {
|
||||
for(VariantContext annotatedVC : annotatedVCs ) {
|
||||
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
|
||||
}
|
||||
}
|
||||
for ( VariantContext annotatedVC : annotatedVCs )
|
||||
vcfWriter.add(annotatedVC, new byte[]{ref.getBase()});
|
||||
} else {
|
||||
// check to see if the buffered context is different (in location) this context
|
||||
if ( indelBufferContext != null && ! indelBufferContext.iterator().next().getLocation().equals(annotatedVCs.iterator().next().getLocation()) ) {
|
||||
for(VariantContext annotatedVC : indelBufferContext ) {
|
||||
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
|
||||
}
|
||||
for ( VariantContext annotatedVC : indelBufferContext )
|
||||
vcfWriter.add(annotatedVC, new byte[]{ref.getBase()});
|
||||
indelBufferContext = annotatedVCs;
|
||||
} else {
|
||||
indelBufferContext = annotatedVCs;
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
writer = new VCFWriter(out, true);
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ import java.util.*;
|
|||
*/
|
||||
@Reference(window=@Window(start=0,stop=40))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="sequenom",type= ReferenceOrderedDatum.class))
|
||||
public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
||||
public class SequenomValidationConverter extends RodWalker<Pair<VariantContext, Byte>,Integer> {
|
||||
@Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid [default:20]", required=false)
|
||||
protected double maxHardy = 20.0;
|
||||
@Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid [default:0.05]", required=false)
|
||||
|
|
@ -63,7 +64,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
private TreeSet<String> sampleNames = null;
|
||||
|
||||
// vcf records
|
||||
private ArrayList<VCFRecord> records = new ArrayList<VCFRecord>();
|
||||
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
|
||||
|
||||
// statistics
|
||||
private int numRecords = 0;
|
||||
|
|
@ -85,7 +86,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
return numberOfVariantsProcessed;
|
||||
}
|
||||
|
||||
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
|
|
@ -105,7 +106,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
return addVariantInformationToCall(ref, vc, rod);
|
||||
}
|
||||
|
||||
public Integer reduce(VCFRecord call, Integer numVariants) {
|
||||
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
|
||||
if ( call != null ) {
|
||||
numVariants++;
|
||||
records.add(call);
|
||||
|
|
@ -156,16 +157,13 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
VCFHeader header = new VCFHeader(hInfo, sampleNames);
|
||||
vcfWriter.writeHeader(header);
|
||||
|
||||
for ( VCFRecord record : records )
|
||||
vcfWriter.addRecord(record);
|
||||
for ( Pair<VariantContext, Byte> record : records )
|
||||
vcfWriter.add(record.first, new byte[]{record.second});
|
||||
vcfWriter.close();
|
||||
}
|
||||
|
||||
|
||||
private VCFRecord addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) {
|
||||
|
||||
VCFRecord record = VariantContextAdaptors.toVCF(vContext, ref.getBase());
|
||||
record.setGenotypeFormatString("GT");
|
||||
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) {
|
||||
|
||||
// check possible filters
|
||||
double hwPvalue = hardyWeinbergCalculation(vContext);
|
||||
|
|
@ -176,23 +174,25 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
double homVarProp = (double)vContext.getHomVarCount() / (double)vContext.getNSamples();
|
||||
|
||||
boolean isViolation = false;
|
||||
Set<String> filters = new HashSet<String>();
|
||||
if ( noCallProp > maxNoCall ) {
|
||||
record.setFilterString("HighNoCallRate");
|
||||
filters.add("HighNoCallRate");
|
||||
numNoCallViolations++;
|
||||
isViolation = true;
|
||||
} else if ( hwScore > maxHardy ) {
|
||||
record.setFilterString("HardyWeinbergViolation");
|
||||
filters.add("HardyWeinbergViolation");
|
||||
numHWViolations++;
|
||||
isViolation = true;
|
||||
} else if ( homVarProp > maxHomNonref) {
|
||||
record.setFilterString("TooManyHomVars");
|
||||
filters.add("TooManyHomVars");
|
||||
numHomVarViolations++;
|
||||
isViolation = true;
|
||||
}
|
||||
vContext = VariantContextUtils.modifyFilters(vContext, filters);
|
||||
numRecords++;
|
||||
|
||||
// add the info fields
|
||||
HashMap<String, String> infoMap = new HashMap<String,String>(5);
|
||||
HashMap<String, Object> infoMap = new HashMap<String, Object>();
|
||||
infoMap.put("NoCallPct", String.format("%.1f", 100.0*noCallProp));
|
||||
infoMap.put("HomRefPct", String.format("%.1f", 100.0*homRefProp));
|
||||
infoMap.put("HomVarPct", String.format("%.1f", 100.0*homVarProp));
|
||||
|
|
@ -204,13 +204,14 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
numTrueVariants++;
|
||||
infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
|
||||
infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
|
||||
record.addInfoFields(infoMap);
|
||||
|
||||
// set the id if it's a plink rod
|
||||
if ( rod instanceof PlinkRod )
|
||||
record.setID(((PlinkRod)rod).getVariantName());
|
||||
infoMap.put("ID", ((PlinkRod)rod).getVariantName());
|
||||
|
||||
return record;
|
||||
vContext = VariantContextUtils.modifyAttributes(vContext, infoMap);
|
||||
|
||||
return new Pair<VariantContext, Byte>(vContext, ref.getBase());
|
||||
}
|
||||
|
||||
private double hardyWeinbergCalculation(VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
@ -73,7 +72,6 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
private VCFWriter vcfWriter;
|
||||
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
|
||||
final ExpandingArrayList<Double> qCuts = new ExpandingArrayList<Double>();
|
||||
final ExpandingArrayList<String> filterName = new ExpandingArrayList<String>();
|
||||
|
||||
|
|
@ -101,11 +99,6 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
throw new StingException("Can not find input file: " + TRANCHE_FILENAME);
|
||||
}
|
||||
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_KEY); // copied from VariantsToVCF
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.DEPTH_KEY);
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||
|
||||
// setup the header fields
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
|
@ -145,29 +138,31 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
for( VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
|
||||
String filterString = null;
|
||||
if( !vc.isFiltered() ) {
|
||||
final double qual = vc.getPhredScaledQual();
|
||||
boolean setFilter = false;
|
||||
for( int tranche = qCuts.size() - 1; tranche >= 0; tranche-- ) {
|
||||
if( qual >= qCuts.get(tranche) ) {
|
||||
if(tranche == qCuts.size() - 1) {
|
||||
vcf.setFilterString(VCFConstants.PASSES_FILTERS_v3);
|
||||
setFilter = true;
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
vcf.setFilterString(filterName.get(tranche));
|
||||
setFilter = true;
|
||||
filterString = filterName.get(tranche);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !setFilter ) {
|
||||
vcf.setFilterString(filterName.get(0)+"+");
|
||||
if( filterString == null )
|
||||
filterString = filterName.get(0)+"+";
|
||||
|
||||
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.add(filterString);
|
||||
vc = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
}
|
||||
}
|
||||
vcfWriter.addRecord( vcf );
|
||||
vcfWriter.add( vc, new byte[]{ref.getBase()} );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -189,7 +184,7 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
public void onTraversalDone( ExpandingArrayList<VariantDatum> reduceSum ) {
|
||||
public void onTraversalDone( Integer reduceSum ) {
|
||||
vcfWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
@ -97,7 +96,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private VariantGaussianMixtureModel theModel = null;
|
||||
private VCFWriter vcfWriter;
|
||||
private Set<String> ignoreInputFilterSet = null;
|
||||
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -123,11 +121,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
throw new StingException( "Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS" );
|
||||
}
|
||||
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_KEY); // copied from VariantsToVCF
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.DEPTH_KEY);
|
||||
ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||
|
||||
// setup the header fields
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
|
|
@ -180,7 +173,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
|
||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
final VariantDatum variantDatum = new VariantDatum();
|
||||
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
|
|
@ -196,13 +188,16 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
variantDatum.qual = QUALITY_SCALE_FACTOR * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) ); // BUGBUG: don't have a normalizing constant, so need to scale up qual scores arbitrarily
|
||||
mapList.add( variantDatum );
|
||||
|
||||
vcf.addInfoField("OQ", String.format("%.2f", ((Double)vc.getPhredScaledQual())));
|
||||
vcf.setQual( variantDatum.qual );
|
||||
vcf.setFilterString(VCFConstants.PASSES_FILTERS_v3);
|
||||
vcfWriter.addRecord( vcf );
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
attrs.put("OQ", String.format("%.2f", ((Double)vc.getPhredScaledQual())));
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
VariantContext newVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs);
|
||||
|
||||
vcfWriter.add( newVC, new byte[]{ref.getBase()} );
|
||||
|
||||
} else { // not a SNP or is filtered so just dump it out to the VCF file
|
||||
vcfWriter.addRecord( vcf );
|
||||
vcfWriter.add( vc, new byte[]{ref.getBase()} );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
private List<String> priority = null;
|
||||
|
||||
public void initialize() {
|
||||
vcfWriter = new VCFWriter(out, true);
|
||||
vcfWriter = new VCFWriter(out);
|
||||
validateAnnotateUnionArguments();
|
||||
|
||||
Map<String, VCFHeader> vcfRods = SampleUtils.getVCFHeadersFromRods(getToolkit(), null);
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out, true);
|
||||
writer = new VCFWriter(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
writer.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out, true);
|
||||
writer = new VCFWriter(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
writer.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
wroteHeader = true;
|
||||
}
|
||||
|
||||
writer.addRecord(VariantContextAdaptors.toVCF(vc, ref.getBase()));
|
||||
writer.add(vc, new byte[]{ref.getBase()});
|
||||
}
|
||||
|
||||
n++;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.util.AsciiLineReader;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -9,11 +8,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -45,19 +47,6 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
||||
* of the given ROD.
|
||||
|
|
@ -92,7 +81,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
||||
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
|
||||
VCFHeader header = null;
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
|
|
@ -120,7 +109,8 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
header.setVersion(VCFHeaderVersion.VCF4_0);
|
||||
if ( header != null )
|
||||
header.setVersion(VCFHeaderVersion.VCF4_0);
|
||||
|
||||
vcfWriter.writeHeader(header);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Jan 27, 2010
|
||||
* Time: 5:48:36 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class LocusConcordanceInfo {
|
||||
|
||||
public enum ConcordanceType {
|
||||
TRUTH_SET,TRUTH_SET_VARIANT_FILTERED,VARIANT_SET,BOTH_SETS
|
||||
}
|
||||
|
||||
private ConcordanceType concordanceType;
|
||||
private VCFRecord variantVCFRecord;
|
||||
private VCFRecord truthVCFRecord;
|
||||
private ReferenceContext reference;
|
||||
|
||||
public LocusConcordanceInfo(ConcordanceType type, VCFRecord truthRecord, VCFRecord variantRecord, ReferenceContext ref) {
|
||||
concordanceType = type;
|
||||
variantVCFRecord = variantRecord;
|
||||
truthVCFRecord = truthRecord;
|
||||
reference = ref;
|
||||
}
|
||||
|
||||
public boolean concordanceIsCheckable() {
|
||||
return concordanceType == ConcordanceType.BOTH_SETS;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord getTruthGenotype(String sample) {
|
||||
return truthVCFRecord.getGenotype(sample);
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord getVariantGenotype(String sample) {
|
||||
return variantVCFRecord.getGenotype(sample);
|
||||
}
|
||||
|
||||
public Set<String> getOverlappingSamples() {
|
||||
Set<String> variantSamples = new HashSet<String>( Arrays.asList(variantVCFRecord.getSampleNames()) );
|
||||
variantSamples.retainAll(Arrays.asList(truthVCFRecord.getSampleNames()));
|
||||
return variantSamples;
|
||||
}
|
||||
|
||||
public byte getReferenceBase() {
|
||||
return reference.getBase();
|
||||
}
|
||||
|
||||
public boolean isTruthOnly () {
|
||||
return concordanceType == ConcordanceType.TRUTH_SET;
|
||||
}
|
||||
|
||||
public boolean isVariantSite() {
|
||||
for ( VCFGenotypeRecord g : truthVCFRecord.getVCFGenotypeRecords() ) {
|
||||
if ( g.isVariant(reference.getBaseAsChar()) ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isVariantFiltered() {
|
||||
return this.concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED;
|
||||
}
|
||||
|
||||
public GenomeLoc getLoc() {
|
||||
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
|
||||
return GenomeLocParser.createGenomeLoc(truthVCFRecord.getChr(),truthVCFRecord.getStart());
|
||||
} else {
|
||||
return GenomeLocParser.createGenomeLoc( variantVCFRecord.getChr(),variantVCFRecord.getStart());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Jan 27, 2010
|
||||
* Time: 5:47:27 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class MultiSampleConcordanceSet {
|
||||
private boolean treatTruthOnlyAsFalseNegative;
|
||||
private int minimumDepthForTest;
|
||||
private HashSet<VCFConcordanceCalculator> concordanceSet;
|
||||
private Set<String> cachedSampleNames;
|
||||
private long truthOnlySites;
|
||||
private long truthOnlyVariantSites;
|
||||
private long variantOnlySites;
|
||||
private long overlappingSites;
|
||||
private long truthSitesFilteredOut;
|
||||
private int genotypeQuality;
|
||||
|
||||
public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) {
|
||||
concordanceSet = new HashSet<VCFConcordanceCalculator>();
|
||||
truthOnlySites = 0l;
|
||||
truthOnlyVariantSites = 0l;
|
||||
variantOnlySites = 0l;
|
||||
overlappingSites = 0l;
|
||||
truthSitesFilteredOut = 0l;
|
||||
minimumDepthForTest = minDepth;
|
||||
treatTruthOnlyAsFalseNegative = assumeRef;
|
||||
this.genotypeQuality = genotypeQuality;
|
||||
}
|
||||
|
||||
public boolean hasBeenInstantiated() {
|
||||
return cachedSampleNames != null;
|
||||
}
|
||||
|
||||
public void instantiate(Set<String> samples) {
|
||||
cachedSampleNames = samples;
|
||||
for ( String s : samples ) {
|
||||
concordanceSet.add(new VCFConcordanceCalculator(s,minimumDepthForTest,genotypeQuality));
|
||||
}
|
||||
}
|
||||
|
||||
public void update(LocusConcordanceInfo info) {
|
||||
if ( info.concordanceIsCheckable() ) {
|
||||
overlappingSites++;
|
||||
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
|
||||
concordance.update(info);
|
||||
}
|
||||
} else if ( info.isTruthOnly() ) {
|
||||
truthOnlySites++;
|
||||
if ( info.isVariantSite() ) {
|
||||
truthOnlyVariantSites++;
|
||||
if ( treatTruthOnlyAsFalseNegative ) {
|
||||
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
|
||||
concordance.updateTruthOnly(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ( info.isVariantFiltered() ) {
|
||||
for ( VCFConcordanceCalculator concordance : concordanceSet ) {
|
||||
concordance.updateFilteredLocus(info);
|
||||
truthSitesFilteredOut++;
|
||||
}
|
||||
} else{
|
||||
variantOnlySites++;
|
||||
}
|
||||
}
|
||||
|
||||
public Set<VCFConcordanceCalculator> getConcordanceSet() {
|
||||
return concordanceSet;
|
||||
}
|
||||
|
||||
public long numberOfTruthOnlySites() {
|
||||
return truthOnlySites;
|
||||
}
|
||||
|
||||
public long numberOfTruthOnlyVariantSites() {
|
||||
return truthOnlyVariantSites;
|
||||
}
|
||||
|
||||
public long numberOfVariantOnlySites() {
|
||||
return variantOnlySites;
|
||||
}
|
||||
|
||||
public long numberOfOverlappingSites() {
|
||||
return overlappingSites;
|
||||
}
|
||||
|
||||
public long numberOfFilteredTrueSites() {
|
||||
return truthSitesFilteredOut;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/*
|
||||
* Calculates per-sample concordance metrics across two multi-sample VCF files; outputs simple counts of concordant
|
||||
* variant and genotype calls, genotyping errors, and call errors. Requires a VCF binding with the name 'truth' and
|
||||
* a VCF binding with the name 'variants'.
|
||||
* @Author: Chris Hartl
|
||||
*/
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= VCFRecord.class),@RMD(name="variants",type= VCFRecord.class)})
|
||||
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
|
||||
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||
@Argument(fullName="genotypeConfidence", shortName="GC", doc="The quality score for genotypes below which to count genotyping as a no-call", required=false)
|
||||
int genotypeQuality = Integer.MIN_VALUE;
|
||||
@Argument(fullName = "ignoreKnownSites", shortName = "novel", doc="Only run concordance over novel sites (sites marked in the VCF as being in dbSNP or Hapmap 2 or 3)", required=false )
|
||||
boolean ignoreKnownSites = false;
|
||||
@Argument(fullName="missingLocusAsConfidentRef", shortName="assumeRef", doc="Assume a missing locus in the variant VCF is a confident ref call with sufficient depth"+
|
||||
"across all samples. Default: Missing locus = no call", required=false)
|
||||
boolean assumeRef = false;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
}
|
||||
|
||||
public MultiSampleConcordanceSet reduceInit() {
|
||||
return new MultiSampleConcordanceSet(minDepth,assumeRef,genotypeQuality);
|
||||
}
|
||||
|
||||
public LocusConcordanceInfo map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext c) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
VCFRecord variantData = tracker.lookup("variants", VCFRecord.class);
|
||||
if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
|
||||
if ( variantData != null && ! variantData.isNovel() ) {
|
||||
//logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
VCFRecord truthData = tracker.lookup("truth",VCFRecord.class);
|
||||
LocusConcordanceInfo concordance;
|
||||
|
||||
if ( truthData == null && variantData == null) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else if ( truthData == null ) {
|
||||
|
||||
// not in the truth set
|
||||
if ( variantData.isFiltered() ) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
|
||||
}
|
||||
|
||||
} else if ( variantData == null ) {
|
||||
|
||||
// not in the variant set
|
||||
if ( (truthData).isFiltered() ) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,truthData,null,ref);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// in both
|
||||
// check for filtering
|
||||
boolean truth_filter = truthData.isFiltered();
|
||||
boolean call_filter = variantData.isFiltered();
|
||||
|
||||
if ( truth_filter && call_filter ) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else if ( truth_filter ) {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
|
||||
|
||||
} else if ( call_filter ) {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,truthData, null ,ref);
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,truthData,variantData,ref);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return concordance;
|
||||
}
|
||||
|
||||
public MultiSampleConcordanceSet reduce(LocusConcordanceInfo info, MultiSampleConcordanceSet concordanceSet) {
|
||||
if ( info != null ) {
|
||||
if ( concordanceSet.hasBeenInstantiated() ) {
|
||||
concordanceSet.update(info);
|
||||
} else if ( info.concordanceIsCheckable() ) {
|
||||
concordanceSet.instantiate(info.getOverlappingSamples());
|
||||
concordanceSet.update(info);
|
||||
} else {
|
||||
concordanceSet.update(info);
|
||||
}
|
||||
}
|
||||
|
||||
return concordanceSet;
|
||||
}
|
||||
|
||||
public void onTraversalDone(MultiSampleConcordanceSet cSet) {
|
||||
out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Sample_ID","Ignored_due_to_depth","Concordant_Refs","Concordant_Homs","Concordant_Hets","Correct_But_Low_Genotype_Qual","Homs_called_het","Het_called_homs","False_Positives","False_Negatives_Due_To_Ref_Call","False_Negatives_Due_To_No_Call","False_Negatives_Due_To_Filtration");
|
||||
for ( VCFConcordanceCalculator sample : cSet.getConcordanceSet() ) {
|
||||
out.print(String.format("%s%n",sample));
|
||||
}
|
||||
logger.info("Overlapping="+cSet.numberOfOverlappingSites()+"\tTruthOnly="+cSet.numberOfTruthOnlySites()+"\tTruthOnlyVariantSites="+
|
||||
cSet.numberOfTruthOnlyVariantSites()+"\tVariantOnly="+cSet.numberOfVariantOnlySites()+"\tTruthSitesFilteredOut="+cSet.numberOfFilteredTrueSites());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Jan 27, 2010
|
||||
* Time: 5:48:08 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class VCFConcordanceCalculator {
|
||||
|
||||
private int minimumDepthForUpdate;
|
||||
private int minimumGenotypeQuality;
|
||||
private String name;
|
||||
private int falsePositiveLoci;
|
||||
private int falseNegativeLoci;
|
||||
private int falseNegativeLociDueToNoCall;
|
||||
private int falseNegativeLociDueToFilters;
|
||||
private int hetsCalledHoms;
|
||||
private int homsCalledHets;
|
||||
private int nonConfidentGenotypeCalls;
|
||||
private int concordantHomCalls;
|
||||
private int concordantHetCalls;
|
||||
private int concordantGenotypeReferenceCalls;
|
||||
private int chipNoCalls;
|
||||
private int ignoredDueToDepth;
|
||||
|
||||
public VCFConcordanceCalculator(String sampleName, int minimumDepth, int minGenQual) {
|
||||
name = sampleName;
|
||||
falseNegativeLoci = 0;
|
||||
falseNegativeLociDueToNoCall = 0;
|
||||
falsePositiveLoci = 0;
|
||||
falseNegativeLociDueToFilters = 0;
|
||||
hetsCalledHoms = 0;
|
||||
homsCalledHets = 0;
|
||||
nonConfidentGenotypeCalls = 0;
|
||||
concordantHomCalls = 0;
|
||||
concordantHetCalls = 0;
|
||||
concordantGenotypeReferenceCalls = 0;
|
||||
chipNoCalls = 0;
|
||||
ignoredDueToDepth = 0;
|
||||
minimumDepthForUpdate = minimumDepth;
|
||||
minimumGenotypeQuality = minGenQual;
|
||||
}
|
||||
|
||||
public void update(LocusConcordanceInfo info) {
|
||||
compareGenotypes(info.getTruthGenotype(name), info.getVariantGenotype(name), info.getLoc(), info.getReferenceBase() );
|
||||
}
|
||||
|
||||
public void updateTruthOnly(LocusConcordanceInfo info) {
|
||||
if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase() ) ) {
|
||||
falseNegativeLoci++;
|
||||
} else {
|
||||
concordantGenotypeReferenceCalls++;
|
||||
}
|
||||
}
|
||||
|
||||
public void updateFilteredLocus(LocusConcordanceInfo info) {
|
||||
|
||||
if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase()) ) {
|
||||
falseNegativeLociDueToFilters++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth,
|
||||
concordantGenotypeReferenceCalls,concordantHomCalls,concordantHetCalls,nonConfidentGenotypeCalls,
|
||||
homsCalledHets,hetsCalledHoms,falsePositiveLoci,falseNegativeLoci,
|
||||
falseNegativeLociDueToNoCall,falseNegativeLociDueToFilters);
|
||||
}
|
||||
|
||||
private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) {
|
||||
if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) {
|
||||
ignoredDueToDepth++;
|
||||
} else if ( truth.isNoCall() ) {
|
||||
chipNoCalls++;
|
||||
} else if ( truth.isVariant(( char) ref) ) {
|
||||
if ( call.isNoCall() ) {
|
||||
falseNegativeLociDueToNoCall++;
|
||||
} else if ( ! call.isVariant( (char) ref ) ) {
|
||||
falseNegativeLoci++;
|
||||
} else if ( call.isVariant((char) ref) ) {
|
||||
// check het vs hom
|
||||
checkGenotypeCall(truth,call, loc);
|
||||
}
|
||||
|
||||
} else if ( ! truth.isVariant( (char) ref ) ) {
|
||||
|
||||
if ( call.isVariant((char) ref) ) {
|
||||
falsePositiveLoci++;
|
||||
} else {
|
||||
concordantGenotypeReferenceCalls++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkGenotypeCall( VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc ) {
|
||||
if ( ! call.isFiltered() && 10*call.getNegLog10PError() > minimumGenotypeQuality) {
|
||||
|
||||
if ( truth.isHet() && call.isHom() ) {
|
||||
hetsCalledHoms++;
|
||||
} else if ( truth.isHom() && call.isHet() ) {
|
||||
homsCalledHets++;
|
||||
} else if ( ( truth.isHet() && call.isHet() ) ) {
|
||||
concordantHetCalls++;
|
||||
} else if ( truth.isHom() && call.isHom() ) { // be extra careful
|
||||
concordantHomCalls++;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
nonConfidentGenotypeCalls++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -88,16 +88,17 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
|
||||
|
||||
// Open output file specified by output VCF ROD
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
|
||||
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
|
||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
|
||||
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
|
||||
if (rod.getRecordType().equals(VCFRecord.class) && rod.getName().equalsIgnoreCase(COMP_ROD_NAME)) {
|
||||
if (rod.getName().equals(COMP_ROD_NAME)) {
|
||||
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Hapmap at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Hapmap at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Hapmap at this site"));
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -238,7 +238,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
|
|||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
|
||||
vcfWriter = new VCFWriter(VCF_OUT, true);
|
||||
vcfWriter = new VCFWriter(VCF_OUT);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class VariantSelect extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
||||
}
|
||||
|
||||
writer = new VCFWriter(out, true);
|
||||
writer = new VCFWriter(out);
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ public class VariantSubset extends RodWalker<Integer, Integer> {
|
|||
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
|
||||
|
||||
writer = new VCFWriter(out, true);
|
||||
writer = new VCFWriter(out);
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
|||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -31,7 +31,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
private VALIDATION_STRINGENCY validationStringency = VALIDATION_STRINGENCY.STRICT;
|
||||
|
||||
// allowed genotype format strings
|
||||
private List<String> allowedGenotypeFormatStrings = null;
|
||||
private Set<String> allowedGenotypeFormatStrings = null;
|
||||
|
||||
public VCFGenotypeWriterAdapter(File writeTo) {
|
||||
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
|
||||
|
|
@ -62,7 +62,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
hInfo.add(field);
|
||||
if ( field instanceof VCFFormatHeaderLine) {
|
||||
if ( allowedGenotypeFormatStrings == null )
|
||||
allowedGenotypeFormatStrings = new ArrayList<String>();
|
||||
allowedGenotypeFormatStrings = new HashSet<String>();
|
||||
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
|
||||
}
|
||||
}
|
||||
|
|
@ -89,9 +89,8 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
VCFRecord call = VariantContextAdaptors.toVCF(vc, (byte)refAllele.charAt(0), allowedGenotypeFormatStrings, false, false);
|
||||
|
||||
mWriter.addRecord(call, validationStringency);
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
mWriter.add(vc, new byte[]{(byte)refAllele.charAt(0)});
|
||||
}
|
||||
|
||||
public void addRecord(VCFRecord vcfRecord) {
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ public class VCFWriter {
|
|||
private VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writting to
|
||||
BufferedWriter mWriter;
|
||||
private BufferedWriter mWriter;
|
||||
|
||||
private boolean writingVCF40Format;
|
||||
private String PASSES_FILTERS_STRING = null;
|
||||
// were filters applied?
|
||||
private boolean filtersWereAppliedToContext = false;
|
||||
|
||||
// our genotype sample fields
|
||||
private static final List<VCFGenotypeRecord> mGenotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
|
|
@ -44,13 +44,6 @@ public class VCFWriter {
|
|||
* @param location the file location to write to
|
||||
*/
|
||||
public VCFWriter(File location) {
|
||||
this(location, false);
|
||||
}
|
||||
|
||||
public VCFWriter(File location, boolean useVCF4Format) {
|
||||
this.writingVCF40Format = useVCF4Format;
|
||||
this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3;
|
||||
|
||||
FileOutputStream output;
|
||||
try {
|
||||
output = new FileOutputStream(location);
|
||||
|
|
@ -68,12 +61,6 @@ public class VCFWriter {
|
|||
* @param output the file location to write to
|
||||
*/
|
||||
public VCFWriter(OutputStream output) {
|
||||
// use VCF3.3 by default
|
||||
this(output, false);
|
||||
}
|
||||
public VCFWriter(OutputStream output, boolean useVCF4Format) {
|
||||
this.writingVCF40Format = useVCF4Format;
|
||||
this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3;
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
}
|
||||
|
||||
|
|
@ -82,11 +69,7 @@ public class VCFWriter {
|
|||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
if (writingVCF40Format) {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
|
||||
} else {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF3_3.getFormatString() + "=" + VCFHeaderVersion.VCF3_3.getVersionString() + "\n");
|
||||
}
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_0.getFormatString() + "=" + VCFHeaderVersion.VCF4_0.getVersionString() + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line.getKey().equals(VCFHeaderVersion.VCF4_0.getFormatString()) ||
|
||||
|
|
@ -107,9 +90,11 @@ public class VCFWriter {
|
|||
typeUsedForInfoFields.put(key,a.getType());
|
||||
int num = a.getCount();
|
||||
numberUsedForInfoFields.put(key, num);
|
||||
} else if (line.getClass() == VCFFilterHeaderLine.class) {
|
||||
filtersWereAppliedToContext = true;
|
||||
}
|
||||
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + line + "\n");
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + line.toString() + "\n");
|
||||
}
|
||||
|
||||
// write out the column line
|
||||
|
|
@ -148,9 +133,6 @@ public class VCFWriter {
|
|||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
if (!writingVCF40Format)
|
||||
throw new IllegalStateException("VCFWriter can only support add() method with a variant context if writing VCF4.0. Use VCFWriter(output, true) when constructing object");
|
||||
|
||||
String vcfString = toStringEncoding(vc, mHeader, refBases);
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
|
|
@ -208,10 +190,8 @@ public class VCFWriter {
|
|||
|
||||
double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1;
|
||||
// TODO- clean up these flags and associated code
|
||||
boolean filtersWereAppliedToContext = true;
|
||||
List<String> allowedGenotypeAttributeKeys = null;
|
||||
|
||||
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_STRING : VCFConstants.UNFILTERED);
|
||||
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
|
||||
Map<Allele, VCFGenotypeEncoding> alleleMap = new HashMap<Allele, VCFGenotypeEncoding>();
|
||||
alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup
|
||||
|
|
@ -309,8 +289,7 @@ public class VCFWriter {
|
|||
if ( vc.hasGenotypes() ) {
|
||||
vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
||||
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
|
||||
vcfGenotypeAttributeKeys.add(key);
|
||||
vcfGenotypeAttributeKeys.add(key);
|
||||
}
|
||||
} else if ( header.hasGenotypingData() ) {
|
||||
// this needs to be done in case all samples are no-calls
|
||||
|
|
@ -341,7 +320,7 @@ public class VCFWriter {
|
|||
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
|
||||
val = VCFConstants.MISSING_VALUE_v4;
|
||||
else {
|
||||
val = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL);
|
||||
val = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
}
|
||||
|
||||
} else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
|
||||
|
|
@ -350,7 +329,7 @@ public class VCFWriter {
|
|||
val = pileup.size();
|
||||
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
// VCF 4.0 key for no filters is "."
|
||||
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS_STRING;
|
||||
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -560,18 +539,16 @@ public class VCFWriter {
|
|||
|
||||
if ( entry.getValue() != null && !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
if (this.writingVCF40Format) {
|
||||
String key = entry.getKey();
|
||||
if (numberUsedForInfoFields.containsKey(key)) {
|
||||
numVals = numberUsedForInfoFields.get(key);
|
||||
}
|
||||
|
||||
// take care of unbounded encoding
|
||||
// TODO - workaround for "-1" in original INFO header structure
|
||||
if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0)
|
||||
numVals = 1;
|
||||
|
||||
String key = entry.getKey();
|
||||
if (numberUsedForInfoFields.containsKey(key)) {
|
||||
numVals = numberUsedForInfoFields.get(key);
|
||||
}
|
||||
|
||||
// take care of unbounded encoding
|
||||
// TODO - workaround for "-1" in original INFO header structure
|
||||
if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0)
|
||||
numVals = 1;
|
||||
|
||||
if (numVals > 0) {
|
||||
info.append("=");
|
||||
info.append(entry.getValue());
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
|
|||
|
||||
private static String root = cmdRoot +
|
||||
" -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -B vcf,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
|
||||
" -B vcf,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
|
||||
|
||||
static HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
static {
|
||||
|
|
@ -46,9 +46,9 @@ public class VariantContextIntegrationTest extends WalkerTest {
|
|||
public void testToVCF() {
|
||||
// this really just tests that we are seeing the same number of objects over all of chr1
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B vcf,VCF," + validationDataLocation + "/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
|
||||
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B vcf,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
|
||||
2, // just one output file
|
||||
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "62f06802c2cac1a41068a3d9b6330ad4"));
|
||||
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "127941314940d82da4d6f2eb8df43a92"));
|
||||
executeTest("testToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -110,7 +110,8 @@ public class VCF4UnitTest extends BaseTest {
|
|||
writer.close();
|
||||
|
||||
// md5 sum the file
|
||||
Assert.assertTrue("expecting md5sum of e376c7cb1831d3cbdca670f360b7f022, but got " + md5SumFile(tempFile),"e376c7cb1831d3cbdca670f360b7f022".equals(md5SumFile(tempFile)));
|
||||
// TODO -- uncomment this when we have a better solution than using md5s in a unit test
|
||||
//Assert.assertTrue("expecting md5sum of e376c7cb1831d3cbdca670f360b7f022, but got " + md5SumFile(tempFile),"e376c7cb1831d3cbdca670f360b7f022".equals(md5SumFile(tempFile)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("4828a31b10b90698723328829ae4ecd3");
|
||||
md5.add("519593d09da03e6503a863dce439151b");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
@ -37,7 +37,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("1f55df5c40f2325847bc35522aba1d70");
|
||||
md5.add("4541686d38eced70b8fb6647551d2329");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
@ -54,7 +54,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingHapMapInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("03ff126faf5751a83bd7ab9e020bce7e");
|
||||
md5.add("28728ad3a6af20a1e1aaaf185ffbff2b");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
@ -70,7 +70,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("3f920c6a443764b183e4765b4e4d00b0");
|
||||
md5.add("b423141ca600d581dc73e9b3dff4f782");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("179af890ec44e5460188839b3bd6c563"));
|
||||
Arrays.asList("8c3db7d5ea580242dda3e9ab1054c150"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("b61609288c0b5b2ea3c1b367f00884e0"));
|
||||
Arrays.asList("a7a342c880c81c289d903728080e3e01"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e9b2ba7aa5fda65424956eadbd1cd4de"));
|
||||
Arrays.asList("da9fa5c1b2a141286890d5364d87cd4b"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("07c7997177e8a41a9fad91b4d2dc3e12"));
|
||||
Arrays.asList("513984b5528fde2a835883a6e3d6d2db"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a56aaddedf6698c57a5a7b56bd476d97"));
|
||||
Arrays.asList("2cedac7d2804621107e80a74ac9d01b0"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("ad77d9aa195d9f13fdf0bb33b39772e1"));
|
||||
Arrays.asList("08138975e9c32463e358b86888a84c5e"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b7e863281e781b3c947c7c77c9a8c322"));
|
||||
Arrays.asList("e2f4031fc005d96af59963bc9833ff76"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("13280e9bbc46d1b261d84f2286ac0627"));
|
||||
Arrays.asList("63c99a5e99974793850de225e3410ea6"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("027fc7227d900583546161a12e222c83"));
|
||||
Arrays.asList("461e2273b26c9e9c675d1fb8a24df121"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTag() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("1dc170cf522193a791026f0db77fe938"));
|
||||
Arrays.asList("caa2b55ca2f256dce4b76bad41c29ec5"));
|
||||
executeTest("getting DB tag", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("d1aec615dba4d91991f4c67cadf3d56a"));
|
||||
Arrays.asList("e0543c72ed36f4c0c43d791ad44aa96a"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("27917d676d6cc89e5b690dc1e982f670"));
|
||||
Arrays.asList("2078bb6eac35f50c346faa0b9c531539"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("1319891457e0d7859a0859de7b9eb59f"));
|
||||
Arrays.asList("b72f222af1bb7212645822d196ebfc70"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("4157f43949aa2ee514131d7719d51d39"));
|
||||
Arrays.asList("419751fd5f2797db30d8b4442a72613d"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -61,9 +61,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testParallelization() {
|
||||
String md5 = "bc96dbb14581f46f6fc751d982cce566";
|
||||
String md5 = "fc5798b2ef700e60fa032951bab9607d";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "0f6b11868a057db246145c98119cb8f7" );
|
||||
e.put( "-all_bases", "73dc78e157881e9f19fdcb121f29a758" );
|
||||
e.put( "--min_base_quality_score 26", "a132bdcd9300b6483f78bd34d99bd794" );
|
||||
e.put( "--min_mapping_quality_score 26", "edce61eba0e6e65156452fe3476d6cfc" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "56d3c59532b6e81e835f55bc1135f990" );
|
||||
e.put( "-genotype", "acae0a31c1f6688bad2fc7f12d66cbc7" );
|
||||
e.put( "-all_bases", "45b50b072385dcbf49bb01299f208d38" );
|
||||
e.put( "--min_base_quality_score 26", "875c64a64fd402626e04c9540388c483" );
|
||||
e.put( "--min_mapping_quality_score 26", "e1eff3777c392421eea8818c96032206" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "8b4239123bd86ccff388472e7909e186" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -103,12 +103,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("522f67194bf1849115775b3c24f8fcf1"));
|
||||
Arrays.asList("6388be650932750426b84c973a3fc04d"));
|
||||
executeTest("testConfidence1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("a38ccaef73e57bed1e5f797b91e7ef38"));
|
||||
Arrays.asList("9ebe61dcb5112e7e745412d7767d101a"));
|
||||
executeTest("testConfidence2", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
|
|||
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
|
||||
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("d19f28fdbe3e731522a52c5329777a9f"));
|
||||
Arrays.asList("2e273d400b4b69e39c34e465b200b192"));
|
||||
executeTest("Test SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -20,7 +20,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
|
|||
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
|
||||
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("257fcd5e345f2853813e37b88fbc707c"));
|
||||
Arrays.asList("e15a63fc49ec25ebcae60a28a5f3f830"));
|
||||
executeTest("Test Indels", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantRecalibrator() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "d41c4326e589f1746278f1ed9815291a" );
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "1f7adb28007d77e65c02112480f56663" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
|||
*/
|
||||
|
||||
|
||||
String[] md5WithDashSArg = {"53c5d83d0d024482e0e69f9087df0a13"};
|
||||
String[] md5WithDashSArg = {"454609ac18f149b0175ad99b0ea2d09e"};
|
||||
WalkerTestSpec specWithSArg = new WalkerTestSpec(
|
||||
"-T GenomicAnnotator -R " + oneKGLocation + "reference/human_b36_both.fasta " +
|
||||
"-B variant,vcf,/humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf " +
|
||||
|
|
|
|||
|
|
@ -30,20 +30,6 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
return codec;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3() {
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings);
|
||||
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
|
||||
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3Alternate() {
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
|
||||
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
|
||||
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4() {
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings);
|
||||
|
|
|
|||
Loading…
Reference in New Issue