1) When quals/GQs are really integers (x.00), strip off the floating points.

2) Keep track of whether vcf records are unfiltered vs. pass filters in the variant context so we can regenerate the records on output.
3) No more "ID" hard-coded all over the code to set the VariantContext ID.  Use a static variable instead.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3840 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-20 18:01:45 +00:00
parent 0db7fab1a9
commit c6ad26e04f
26 changed files with 150 additions and 127 deletions

View File

@ -81,4 +81,5 @@ public final class VCFConstants {
public static final double MAX_GENOTYPE_QUAL = 99.0;
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00";
}

View File

@ -17,11 +17,14 @@ public class Genotype {
protected InferredGeneticContext commonInfo;
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
protected List<Allele> alleles = null; // new ArrayList<Allele>();
private boolean genotypesArePhased = false;
private boolean filtersWereAppliedToContext;
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesArePhased) {
this.alleles = Collections.unmodifiableList(alleles);
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
filtersWereAppliedToContext = filters != null;
this.genotypesArePhased = genotypesArePhased;
validate();
}
@ -191,6 +194,7 @@ public class Genotype {
public Set<String> getFilters() { return commonInfo.getFilters(); }
public boolean isFiltered() { return commonInfo.isFiltered(); }
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
public boolean filtersWereApplied() { return filtersWereAppliedToContext; }
public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); }
public double getNegLog10PError() { return commonInfo.getNegLog10PError(); }
public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); }

View File

@ -164,6 +164,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
protected InferredGeneticContext commonInfo = null;
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
public final static String REFERENCE_BASE_FOR_INDEL_KEY = "REFERENCE_BASE_FOR_INDEL";
public final static String ID_KEY = "ID";
/** The location of this VariantContext */
private GenomeLoc loc;
@ -188,22 +189,32 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// set to the alt allele when biallelic, otherwise == null
private Allele ALT = null;
// were filters applied?
private boolean filtersWereAppliedToContext;
// ---------------------------------------------------------------------------------------------------------
//
// constructors
//
// ---------------------------------------------------------------------------------------------------------
// todo move all of attribute object attributes into Map<> and make special filter value for printing out values when
// emitting VC -> VCF or whatever
/**
* the complete constructor. Makes a complete VariantContext from its arguments
*
* @param name name
* @param loc location
* @param alleles alleles
* @param genotypes genotypes map
* @param negLog10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); }
this.loc = loc;
this.commonInfo = new InferredGeneticContext(name, negLog10PError, filters, attributes);
filtersWereAppliedToContext = filters != null;
if ( alleles == null ) { throw new StingException("Alleles cannot be null"); }
// we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
@ -228,13 +239,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Create a new VariantContext
*
* @param name
* @param loc
* @param alleles
* @param genotypes
* @param negLog10PError
* @param filters
* @param attributes
* @param name name
* @param loc location
* @param alleles alleles
* @param genotypes genotypes set
* @param negLog10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(name, loc, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes);
@ -242,9 +253,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
* @param name
* @param loc
* @param alleles
* @param name name
* @param loc location
* @param alleles alleles
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles) {
this(name, loc, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
@ -252,9 +263,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
* @param name
* @param loc
* @param alleles
* @param name name
* @param loc location
* @param alleles alleles
* @param genotypes genotypes
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
this(name, loc, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
@ -281,8 +293,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotype
* @return
* @param genotype genotype
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Genotype genotype) {
return subContextFromGenotypes(Arrays.asList(genotype));
@ -294,17 +306,17 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotypes
* @return
* @param genotypes genotypes
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
return new VariantContext(getName(), getLocation(), allelesOfGenotypes(genotypes), genotypes, getNegLog10PError(), getFilters(), getAttributes());
}
/**
* helper routnine for subcontext
* @param genotypes
* @return
* helper routine for subcontext
* @param genotypes genotypes
* @return allele set
*/
private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
Set<Allele> alleles = new HashSet<Allele>();
@ -478,6 +490,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
public Set<String> getFilters() { return commonInfo.getFilters(); }
public boolean isFiltered() { return commonInfo.isFiltered(); }
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
public boolean filtersWereApplied() { return filtersWereAppliedToContext; }
public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); }
public double getNegLog10PError() { return commonInfo.getNegLog10PError(); }
public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); }
@ -724,7 +737,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
*
* @return
* @return chromosome count
*/
public int getChromosomeCount() {
int n = 0;
@ -739,8 +752,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Returns the number of chromosomes carrying allele A in the genotypes
*
* @param a
* @return
* @param a allele
* @return chromosome count
*/
public int getChromosomeCount(Allele a) {
int n = 0;

View File

@ -242,8 +242,8 @@ public class VariantContextUtils {
filters.addAll(vc.getFilters());
if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) )
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
if ( rsID == null && vc.hasAttribute("ID") )
rsID = vc.getAttributeAsString("ID");
if ( rsID == null && vc.hasAttribute(VariantContext.ID_KEY) )
rsID = vc.getAttributeAsString(VariantContext.ID_KEY);
for ( Map.Entry<String, Object> p : vc.getAttributes().entrySet() ) {
if ( ! attributes.containsKey(p.getKey()) || attributes.get(p.getKey()).equals(".") ) { // no value
@ -277,7 +277,7 @@ public class VariantContextUtils {
if ( depth > 0 )
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
if ( rsID != null )
attributes.put("ID", rsID);
attributes.put(VariantContext.ID_KEY, rsID);
VariantContext merged = new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
@ -431,11 +431,11 @@ public class VariantContextUtils {
}
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
}
public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) {
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
}
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
@ -443,15 +443,15 @@ public class VariantContextUtils {
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes);
}
public static Genotype modifyName(Genotype g, String name) {
return new Genotype(name, g.getAlleles(), g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.genotypesArePhased());
return new Genotype(name, g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.genotypesArePhased());
}
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(), attributes, g.genotypesArePhased());
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.genotypesArePhased());
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {

View File

@ -108,7 +108,7 @@ public class VariantContextAdaptors {
}
Map<String, String> attributes = new HashMap<String, String>();
attributes.put("ID", dbsnp.getRsID());
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
Collection<Genotype> genotypes = null;
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
return vc;
@ -133,7 +133,7 @@ public class VariantContextAdaptors {
Set<String> filters = vcf.isFiltered() ? new HashSet<String>(Arrays.asList(vcf.getFilteringCodes())) : null;
Map<String, String> attributes = new HashMap<String, String>(vcf.getInfoValues());
attributes.put("ID", vcf.getID());
attributes.put(VariantContext.ID_KEY, vcf.getID());
// add all of the alt alleles
List<Allele> alleles = new ArrayList<Allele>();

View File

@ -260,7 +260,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
// validate the fields
validateFields(attributes.keySet(),new ArrayList(infoFields.keySet()));
attributes.put("ID", id);
attributes.put(VariantContext.ID_KEY, id);
return attributes;
}
@ -285,15 +285,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* @return return a double
*/
private Double parseQual(String qualString) {
if (qualString.equals(VCFConstants.MISSING_VALUE_v4))
if ( qualString.equals(VCFConstants.MISSING_VALUE_v4) || qualString.equals(VCFConstants.MISSING_QUALITY_v3) )
return VariantContext.NO_NEG_LOG_10PERROR;
else {
double q = Double.valueOf(qualString);
if ( q == -1 )
return VariantContext.NO_NEG_LOG_10PERROR;
else
return Double.valueOf(qualString) / 10;
}
return Double.valueOf(qualString) / 10;
}
/**
@ -357,36 +351,36 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* @return a set of the filters applied
*/
private Set<String> parseFilters(String filterString) {
Set<String> fFields;
// a PASS is simple (no filters)
String passString = VCFConstants.PASSES_FILTERS_v3;
if (this.version == VCFHeaderVersion.VCF4_0)
passString = VCFConstants.PASSES_FILTERS_v4;
// null for unfiltered
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null;
if ( filterString.equals(passString) ) {
return null;
}
if ( filterString.equals(VCFConstants.UNFILTERED)) {
return null;
}
// else do we have the filter string cached?
else if (filterHash.containsKey(filterString)) {
fFields = filterHash.get(filterString);
// empty set for passes filters
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
if ( this.version == VCFHeaderVersion.VCF4_0 ) {
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
return fFields;
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
throw new StingException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4.0");
} else if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) ) {
return fFields;
}
// do we have the filter string cached?
if ( filterHash.containsKey(filterString) )
return filterHash.get(filterString);
// otherwise we have to parse and cache the value
else {
LinkedHashSet<String> s = new LinkedHashSet<String>(1);
if ( filterString.indexOf(";") == -1 ) {
s.add(filterString);
} else {
s.addAll(Utils.split(filterString, ";"));
}
filterHash.put(filterString,s);
fFields = s;
}
if ( filterString.indexOf(";") == -1 )
fFields.add(filterString);
else
fFields.addAll(Utils.split(filterString, ";"));
validateFields(fFields,filterFields);
filterHash.put(filterString, fFields);
validateFields(fFields, filterFields);
return fFields;
}

View File

@ -72,7 +72,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
for ( VariantContext vc : contexts ) {
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
if ( dbsnp != null )
attrs.put("ID", dbsnp.getRsID());
attrs.put(VariantContext.ID_KEY, dbsnp.getRsID());
vc = VariantContextUtils.modifyAttributes(vc, attrs);
// set the appropriate sample name if necessary

View File

@ -191,8 +191,8 @@ public class VariantAnnotatorEngine {
DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
infoAnnotations.put(VCFConstants.DBSNP_KEY, dbsnp == null ? false : true);
// annotate dbsnp id if available and not already there
if ( dbsnp != null && (!vc.hasAttribute("ID") || vc.getAttribute("ID").equals(VCFConstants.EMPTY_ID_FIELD)) )
infoAnnotations.put("ID", dbsnp.getRsID());
if ( dbsnp != null && (!vc.hasAttribute(VariantContext.ID_KEY) || vc.getAttribute(VariantContext.ID_KEY).equals(VCFConstants.EMPTY_ID_FIELD)) )
infoAnnotations.put(VariantContext.ID_KEY, dbsnp.getRsID());
} else {
List<Object> dbRod = tracker.getReferenceMetaData(dbSet.getKey());
infoAnnotations.put(dbSet.getValue(), dbRod.size() == 0 ? false : true);
@ -252,7 +252,7 @@ public class VariantAnnotatorEngine {
//Create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList
Collection<VariantContext> returnValue = new LinkedList<VariantContext>();
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
returnValue.add( new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), infoAnnotationOutput) );
returnValue.add( new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) );
}
return returnValue;

View File

@ -372,7 +372,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
DbSNPFeature dbsnp = getDbSNP(tracker);
if ( dbsnp != null )
attributes.put("ID", dbsnp.getRsID());
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( !UAC.NO_SLOD ) {
// the overall lod

View File

@ -321,9 +321,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
readsToClean.clear();
readsNotToClean.clear();
do {
currentInterval = intervals.hasNext() ? intervals.next() : null;
} while ( currentInterval != null && (readLoc == null || currentInterval.isBefore(readLoc)) );
try {
do {
currentInterval = intervals.hasNext() ? intervals.next() : null;
} while ( currentInterval != null && (readLoc == null || currentInterval.isBefore(readLoc)) );
} catch (StingException e) {
throw new StingException(e.getMessage() + " *** Are you sure that your interval file is sorted? If not, you must use the --targetIntervalsAreNotSorted argument. ***");
}
// call back into map now that the state has been updated
map(ref, read, metaDataTracker);

View File

@ -209,7 +209,7 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
// set the id if it's a plink rod
if ( rod instanceof PlinkRod )
infoMap.put("ID", ((PlinkRod)rod).getVariantName());
infoMap.put(VariantContext.ID_KEY, ((PlinkRod)rod).getVariantName());
vContext = VariantContextUtils.modifyAttributes(vContext, infoMap);

View File

@ -353,9 +353,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
return new HashSet<String>(toExclude);
}
private final static String ID = "ID";
private boolean excludeComp(VariantContext vc) {
String id = vc != null && vc.hasAttribute(ID) ? vc.getAttributeAsString(ID) : null;
String id = vc != null && vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : null;
boolean ex = rsIDsToExclude != null && id != null && rsIDsToExclude.contains(id);
//System.out.printf("Testing id %s ex=%b against %s%n", id, ex, vc);
return ex;

View File

@ -202,7 +202,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
public VariantContext toVariantContext() {
newAttributes.putAll(trio.getAttributes());
return new VariantContext(trio.getName(),trio.getLocation(),trio.getAlleles(),trio.getGenotypes(),trio.getNegLog10PError(),trio.getFilters(),newAttributes);
return new VariantContext(trio.getName(),trio.getLocation(),trio.getAlleles(),trio.getGenotypes(),trio.getNegLog10PError(),trio.filtersWereApplied()?trio.getFilters():null,newAttributes);
}
public boolean siteIsFiltered() {

View File

@ -284,7 +284,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
}
VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getLocation(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.getFilters(), vc_input.getAttributes());
VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getLocation(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes());
Set<Allele> altAlleles = filteredVC.getAlternateAlleles();
StringBuffer altAlleleCountString = new StringBuffer();

View File

@ -127,7 +127,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
out.printf("%s\t%s\t\t%d\t%f\t%d\t%c\t%s\t%s\t%d\t%d%n",
context.getLocation(),
vc.hasAttribute("ID") ? vc.getAttribute("ID") : "?",
vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttribute(VariantContext.ID_KEY) : "?",
coverage,
((float) coverage)/((float) reads.size()),
goodIterations,

View File

@ -84,7 +84,7 @@ public class AnnotationDataManager {
treeSet.add(datum);
}
final boolean isNovelVariant = infoField.containsKey("ID");
final boolean isNovelVariant = infoField.containsKey(VariantContext.ID_KEY);
// Decide if the variant is a transition or transversion
if ( vc.isSNP() ) {

View File

@ -153,7 +153,7 @@ public class VCFWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ID
String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD;
String ID = vc.hasAttribute(VariantContext.ID_KEY) ? vc.getAttributeAsString(VariantContext.ID_KEY) : VCFConstants.EMPTY_ID_FIELD;
mWriter.write(ID);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -186,11 +186,11 @@ public class VCFWriter {
if ( !vc.hasNegLog10PError() )
mWriter.write(VCFConstants.MISSING_VALUE_v4);
else
mWriter.write(String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, vc.getPhredScaledQual()));
mWriter.write(getQualValue(vc.getPhredScaledQual()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -198,7 +198,7 @@ public class VCFWriter {
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals("ID") || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
continue;
String outputValue = formatVCFField(field.getValue());
@ -235,6 +235,13 @@ public class VCFWriter {
}
private String getQualValue(double qual) {
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
return s;
}
private String makeAlleleString(Allele allele, boolean isIndel, byte ref) {
String s = new String(allele.getBases());
if ( isIndel || s.length() == 0 ) // in case the context is monomorphic at an indel site
@ -322,14 +329,14 @@ public class VCFWriter {
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
val = VCFConstants.MISSING_VALUE_v4;
else {
val = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
if ( pileup != null )
val = pileup.size();
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);

View File

@ -48,7 +48,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -B vcf,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
2, // just one output file
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "127941314940d82da4d6f2eb8df43a92"));
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "f4db5f7346792b1155693722bc190f63"));
executeTest("testToVCF", spec);
}

View File

@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("519593d09da03e6503a863dce439151b");
md5.add("b4f98bee580508637c88c421064936fc");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
@ -37,7 +37,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("4541686d38eced70b8fb6647551d2329");
md5.add("0f310612c8609cba3dcf9cc97b2c1195");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
@ -70,7 +70,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("919eb499bfcc980a14825a0265e575e3");
md5.add("19371e6cfea5f29fb75d5a2be7fccd34");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +

View File

@ -15,7 +15,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("8c3db7d5ea580242dda3e9ab1054c150"));
Arrays.asList("989ff3afb1384b3c6c8a284b11ebb228"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("a7a342c880c81c289d903728080e3e01"));
Arrays.asList("5c58506847ddf85bfe75c3cf3babb669"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("da9fa5c1b2a141286890d5364d87cd4b"));
Arrays.asList("184fc1f99dfba3e2519d16458d728fcc"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("513984b5528fde2a835883a6e3d6d2db"));
Arrays.asList("12a069d5f1c9cd3970ea6301397219aa"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2cedac7d2804621107e80a74ac9d01b0"));
Arrays.asList("3712b2901bece15094c1eb468dfdc5a8"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("08138975e9c32463e358b86888a84c5e"));
Arrays.asList("b7b0e9f3f4f25fd41f388a736dd7b3b8"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e2f4031fc005d96af59963bc9833ff76"));
Arrays.asList("8f42df642b329ff19bc2c39470117280"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("63c99a5e99974793850de225e3410ea6"));
Arrays.asList("908aa4b6ac65bee57f91bc6fed4d46ad"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("461e2273b26c9e9c675d1fb8a24df121"));
Arrays.asList("24234da54855c892625008fb134e3a88"));
executeTest("not passing it any reads", spec);
}
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTag() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("caa2b55ca2f256dce4b76bad41c29ec5"));
Arrays.asList("24d9649943be876e78f76bbf9ff5b501"));
executeTest("getting DB tag", spec);
}
}

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e0543c72ed36f4c0c43d791ad44aa96a"));
Arrays.asList("2cac82e304185cfceea5816f89f64773"));
executeTest("test no action", spec);
}
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2385975931cd06fca452655bebf5c379"));
Arrays.asList("5ae28a70de7a778c50749a60b69724ee"));
executeTest("test clustered SNPs", spec);
}
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6743efa09985206819adcf8eaf5ff936"));
Arrays.asList("0404f10b3c928ee1ea240ccea6ee3cd1"));
executeTest("test mask", spec);
}
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e054f57d3794ce8a57cae92f16886cf0"));
Arrays.asList("fa110840f477b238c0b30ed4fae5ab72"));
executeTest("test filter #1", spec);
}
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("db09a1f7ff523087ea0f6a56f3febfe7"));
Arrays.asList("9a30dd4c67ddbfadc9e153e0345b46d4"));
executeTest("test filter #2", spec);
}
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("edbd505d8d55b4ba71f99d7006871db0"));
Arrays.asList("7052fca252754e7a92ddb1f27123c7c8"));
executeTest("test filter with separate names #2", spec);
}
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("391cd1c96546d1760265f7924428af8f"));
Arrays.asList("e22ecae2f992fb9d5a91c286f9ac3e40"));
executeTest("test genotype filter #1", spec);
}
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("0eb48f8cb2ad6a0e46c88a5116be2b04"));
Arrays.asList("b7e7e7abbf5b03d773f82cced33497a4"));
executeTest("test genotype filter #2", spec);
}
}

View File

@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("2078bb6eac35f50c346faa0b9c531539"));
Arrays.asList("3a402233264e21a84d421e3a4ea64768"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("b72f222af1bb7212645822d196ebfc70"));
Arrays.asList("79736b3e955a16b30f827b2786fc08b1"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("419751fd5f2797db30d8b4442a72613d"));
Arrays.asList("b8d93c6fcb4b17d454cdcbfc4b43f076"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParallelization() {
String md5 = "fc5798b2ef700e60fa032951bab9607d";
String md5 = "098802639cfab1b777c96d38376f118a";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
@ -90,11 +90,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "acae0a31c1f6688bad2fc7f12d66cbc7" );
e.put( "-all_bases", "45b50b072385dcbf49bb01299f208d38" );
e.put( "-genotype", "b87f28a772eb75c8dad9062c6d039da5" );
e.put( "-all_bases", "ec9de2044cd5f5901d6879b56f12993a" );
e.put( "--min_base_quality_score 26", "875c64a64fd402626e04c9540388c483" );
e.put( "--min_mapping_quality_score 26", "e1eff3777c392421eea8818c96032206" );
e.put( "--max_mismatches_in_40bp_window 5", "8b4239123bd86ccff388472e7909e186" );
e.put( "--min_mapping_quality_score 26", "de7d90e425f8f08f219dc91a25d60c68" );
e.put( "--max_mismatches_in_40bp_window 5", "758e312b2a2e7c4d83f60174d43fac8a" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -108,12 +108,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("6388be650932750426b84c973a3fc04d"));
Arrays.asList("4937bab94b0bae1aa61cdf3a06cb49e8"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("e3f402fbbb6bbb4f60b1aa0549989d85"));
Arrays.asList("f8b722dad5c4868a4bba246eef83f96d"));
executeTest("testConfidence2", spec2);
}

View File

@ -118,7 +118,7 @@ public class
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("483f821ce96f4cf571e9bba356c9f325", "d94328f4a5f7c40e95edf2ef13f38ae0"));
Arrays.asList("483f821ce96f4cf571e9bba356c9f325", "989bc30dea6c8a4cf771cd1b9fdab488"));
executeTest("testVEWriteVCF", spec);
}
}

View File

@ -37,7 +37,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "a1acb90f0695cbe33c290403113ac3e1" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "0cb94385ced8a7a537d7bc79f82c01d3" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();

View File

@ -62,7 +62,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", ""); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "2ec9e7acff0c36c2b51b2b720944bcde"); } // official project VCF files in tabix format
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "02f292cde282ab8b0c69459335abb74f"); } // official project VCF files in tabix format
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }

View File

@ -26,7 +26,7 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
*/
String[] md5WithDashSArg = {"454609ac18f149b0175ad99b0ea2d09e"};
String[] md5WithDashSArg = {"94edacdaee0dd58508d35d4d6040e31b"};
WalkerTestSpec specWithSArg = new WalkerTestSpec(
"-T GenomicAnnotator -R " + oneKGLocation + "reference/human_b36_both.fasta " +
"-B variant,vcf,/humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf " +