Near final BCF2 implementation
-- Trivial import changes in some walkers -- SelectVariants has a new hidden mode to fully decode a VCF file -- DepthPerAlleleBySample (AD) changed to have not UNBOUNDED by A type, which is actually the right type -- GenotypeLikelihoods now implements List<Double> for convenience. The PL duality here is going to be removed in a subsequent commit -- BugFixes in BCF2Writer. Proper handling of padding. Bugfix for nFields for a field -- padAllele function in VariantContextUtils -- Much better tests for VariantContextTestProvider, including loading parts of dbSNP 135 and the Phase II 1000G call set with genotypes to test encoding / decoding of fields.
This commit is contained in:
parent
dfee17a672
commit
aaf11f00e3
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -15,10 +16,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -79,9 +77,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||||
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
||||||
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
return toADAnnotation(counts);
|
||||||
map.put(getKeyNames().get(0), counts);
|
|
||||||
return map;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String,Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {
|
private Map<String,Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {
|
||||||
|
|
@ -132,11 +128,11 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||||
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
return toADAnnotation(counts);
|
||||||
map.put(getKeyNames().get(0), counts);
|
}
|
||||||
|
|
||||||
//map.put(getKeyNames().get(0), counts);
|
private final Map<String, Object> toADAnnotation(final Integer[] counts) {
|
||||||
return map;
|
return Collections.singletonMap(getKeyNames().get(0), (Object)Arrays.asList(counts));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getAlleleRepresentation(Allele allele) {
|
private String getAlleleRepresentation(Allele allele) {
|
||||||
|
|
@ -151,5 +147,12 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
// public String getIndelBases()
|
// public String getIndelBases()
|
||||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||||
|
|
||||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||||
|
return Arrays.asList(
|
||||||
|
new VCFFormatHeaderLine(
|
||||||
|
getKeyNames().get(0),
|
||||||
|
VCFHeaderLineCount.A,
|
||||||
|
VCFHeaderLineType.Integer,
|
||||||
|
"Allelic depths for the ref and alt alleles in the order listed"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -310,6 +310,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
||||||
private String outMVFile = null;
|
private String outMVFile = null;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
|
||||||
|
private boolean fullyDecode = false;
|
||||||
|
|
||||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||||
private class RandomVariantStructure {
|
private class RandomVariantStructure {
|
||||||
private VariantContext vc;
|
private VariantContext vc;
|
||||||
|
|
@ -357,6 +361,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
private Random randomGenotypes = new Random();
|
private Random randomGenotypes = new Random();
|
||||||
|
|
||||||
private Set<String> IDsToKeep = null;
|
private Set<String> IDsToKeep = null;
|
||||||
|
private Map<String, VCFHeader> vcfRods;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
|
|
@ -365,7 +370,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
// Get list of samples to include in the output
|
// Get list of samples to include in the output
|
||||||
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
|
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
|
||||||
|
|
||||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||||
|
|
||||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||||
|
|
@ -486,6 +491,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
}
|
}
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
|
if ( fullyDecode ) vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||||
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -724,6 +730,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC) {
|
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC) {
|
||||||
|
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
|
||||||
|
|
||||||
if (KEEP_ORIGINAL_CHR_COUNTS) {
|
if (KEEP_ORIGINAL_CHR_COUNTS) {
|
||||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||||
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||||
|
|
|
||||||
|
|
@ -323,7 +323,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
getters.put("REF", new Getter() {
|
getters.put("REF", new Getter() {
|
||||||
public String get(VariantContext vc) {
|
public String get(VariantContext vc) {
|
||||||
StringBuilder x = new StringBuilder();
|
StringBuilder x = new StringBuilder();
|
||||||
x.append(vc.getAlleleWithRefPadding(vc.getReference()));
|
x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -335,7 +335,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
for ( int i = 0; i < n; i++ ) {
|
for ( int i = 0; i < n; i++ ) {
|
||||||
if ( i != 0 ) x.append(",");
|
if ( i != 0 ) x.append(",");
|
||||||
x.append(vc.getAlleleWithRefPadding(vc.getAlternateAllele(i)));
|
x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
|
||||||
}
|
}
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -377,11 +377,11 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
private static Object splitAltAlleles(VariantContext vc) {
|
private static Object splitAltAlleles(VariantContext vc) {
|
||||||
final int numAltAlleles = vc.getAlternateAlleles().size();
|
final int numAltAlleles = vc.getAlternateAlleles().size();
|
||||||
if ( numAltAlleles == 1 )
|
if ( numAltAlleles == 1 )
|
||||||
return vc.getAlleleWithRefPadding(vc.getAlternateAllele(0));
|
return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
|
||||||
|
|
||||||
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
||||||
for ( Allele allele : vc.getAlternateAlleles() )
|
for ( Allele allele : vc.getAlternateAlleles() )
|
||||||
alleles.add(vc.getAlleleWithRefPadding(allele));
|
alleles.add(vc.getAlleleStringWithRefPadding(allele));
|
||||||
return alleles;
|
return alleles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
public static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||||
if ( AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||||
return clippedAlleles;
|
return clippedAlleles;
|
||||||
|
|
|
||||||
|
|
@ -24,15 +24,16 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.EnumMap;
|
import java.util.*;
|
||||||
|
|
||||||
public class GenotypeLikelihoods {
|
public class GenotypeLikelihoods implements List<Double> {
|
||||||
public static final boolean CAP_PLS = false;
|
public static final boolean CAP_PLS = false;
|
||||||
public static final int PL_CAP = 255;
|
public static final int PL_CAP = 255;
|
||||||
|
|
||||||
|
|
@ -205,6 +206,41 @@ public class GenotypeLikelihoods {
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// List interface functions
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private final void notImplemented() {
|
||||||
|
throw new ReviewedStingException("BUG: code not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public int size() { return this.log10Likelihoods.length; }
|
||||||
|
@Override public Double get(final int i) { return log10Likelihoods[i];}
|
||||||
|
@Override public Double set(final int i, final Double aDouble) { return log10Likelihoods[i] = aDouble; }
|
||||||
|
@Override public boolean isEmpty() { return false; }
|
||||||
|
@Override public Iterator<Double> iterator() { return Arrays.asList(ArrayUtils.toObject(log10Likelihoods)).iterator(); }
|
||||||
|
@Override public Object[] toArray() { return ArrayUtils.toObject(log10Likelihoods); }
|
||||||
|
|
||||||
|
// none of these are implemented
|
||||||
|
@Override public boolean contains(final Object o) { notImplemented(); return false; }
|
||||||
|
@Override public <T> T[] toArray(final T[] ts) { notImplemented(); return null; }
|
||||||
|
@Override public boolean add(final Double aDouble) { notImplemented(); return false; }
|
||||||
|
@Override public boolean remove(final Object o) {notImplemented(); return false; }
|
||||||
|
@Override public boolean containsAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||||
|
@Override public boolean addAll(final Collection<? extends Double> doubles) { notImplemented(); return false; }
|
||||||
|
@Override public boolean addAll(final int i, final Collection<? extends Double> doubles) { notImplemented(); return false; }
|
||||||
|
@Override public boolean removeAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||||
|
@Override public boolean retainAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||||
|
@Override public void clear() { notImplemented(); }
|
||||||
|
@Override public void add(final int i, final Double aDouble) { notImplemented(); }
|
||||||
|
@Override public Double remove(final int i) { notImplemented(); return null; }
|
||||||
|
@Override public int indexOf(final Object o) { notImplemented(); return -1; }
|
||||||
|
@Override public int lastIndexOf(final Object o) { notImplemented(); return 0; }
|
||||||
|
@Override public ListIterator<Double> listIterator() { notImplemented(); return null; }
|
||||||
|
@Override public ListIterator<Double> listIterator(final int i) { notImplemented(); return null; }
|
||||||
|
@Override public List<Double> subList(final int i, final int i1) { notImplemented(); return null; }
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -280,7 +316,6 @@ public class GenotypeLikelihoods {
|
||||||
* @param ploidy Ploidy, or number of chromosomes in set
|
* @param ploidy Ploidy, or number of chromosomes in set
|
||||||
* @return Number of likelihood elements we need to hold.
|
* @return Number of likelihood elements we need to hold.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||||
|
|
||||||
// fast, closed form solution for diploid samples (most common use case)
|
// fast, closed form solution for diploid samples (most common use case)
|
||||||
|
|
|
||||||
|
|
@ -519,13 +519,10 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
return REFERENCE_BASE_FOR_INDEL;
|
return REFERENCE_BASE_FOR_INDEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAlleleWithRefPadding(final Allele allele) {
|
public String getAlleleStringWithRefPadding(final Allele allele) {
|
||||||
if ( hasReferenceBaseForIndel() && isIndel() ) {
|
if ( VariantContextUtils.needsPadding(this) )
|
||||||
StringBuilder sb = new StringBuilder();
|
return VariantContextUtils.padAllele(this, allele);
|
||||||
sb.append((char)getReferenceBaseForIndel().byteValue());
|
else
|
||||||
sb.append(allele.getDisplayString());
|
|
||||||
return sb.toString();
|
|
||||||
} else
|
|
||||||
return allele.getDisplayString();
|
return allele.getDisplayString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1288,6 +1285,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
final String field = attr.getKey();
|
final String field = attr.getKey();
|
||||||
final VCFCompoundHeaderLine format = getMetaDataForField(header, field);
|
final VCFCompoundHeaderLine format = getMetaDataForField(header, field);
|
||||||
final Object decoded = decodeValue(field, attr.getValue(), format);
|
final Object decoded = decodeValue(field, attr.getValue(), format);
|
||||||
|
|
||||||
if ( decoded != null )
|
if ( decoded != null )
|
||||||
newAttributes.put(field, decoded);
|
newAttributes.put(field, decoded);
|
||||||
}
|
}
|
||||||
|
|
@ -1297,6 +1295,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
|
|
||||||
private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) {
|
private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) {
|
||||||
if ( value instanceof String ) {
|
if ( value instanceof String ) {
|
||||||
|
if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) )
|
||||||
|
return value.equals(".") ? null : new GenotypeLikelihoods((String)value);
|
||||||
|
|
||||||
final String string = (String)value;
|
final String string = (String)value;
|
||||||
if ( string.indexOf(",") != -1 ) {
|
if ( string.indexOf(",") != -1 ) {
|
||||||
final String[] splits = string.split(",");
|
final String[] splits = string.split(",");
|
||||||
|
|
@ -1307,6 +1308,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
} else {
|
} else {
|
||||||
return decodeOne(field, string, format);
|
return decodeOne(field, string, format);
|
||||||
}
|
}
|
||||||
|
} else if ( value instanceof List && (((List) value).get(0)) instanceof String ) {
|
||||||
|
final List<String> asList = (List<String>)value;
|
||||||
|
final List<Object> values = new ArrayList<Object>(asList.size());
|
||||||
|
for ( final String s : asList )
|
||||||
|
values.add(decodeOne(field, s, format));
|
||||||
|
return values;
|
||||||
} else {
|
} else {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
@ -1321,7 +1328,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
case Flag: return Boolean.valueOf(string);
|
case Flag: return Boolean.valueOf(string);
|
||||||
case String: return string;
|
case String: return string;
|
||||||
case Integer: return Integer.valueOf(string);
|
case Integer: return Integer.valueOf(string);
|
||||||
case Float: return Float.valueOf(string);
|
case Float: return Double.valueOf(string);
|
||||||
default: throw new ReviewedStingException("Unexpected type for field" + field);
|
default: throw new ReviewedStingException("Unexpected type for field" + field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -197,6 +197,16 @@ public class VariantContextUtils {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String padAllele(final VariantContext vc, final Allele allele) {
|
||||||
|
assert needsPadding(vc);
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||||
|
sb.append(allele.getDisplayString());
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||||
final boolean padVC = needsPadding(inputVC);
|
final boolean padVC = needsPadding(inputVC);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,10 +32,7 @@ import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -177,8 +174,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||||
|
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
||||||
for ( final Allele allele : vc.getAlleles() ) {
|
for ( final Allele allele : vc.getAlleles() ) {
|
||||||
final String s = vc.getAlleleWithRefPadding(allele);
|
final String s = needsPadding ? VariantContextUtils.padAllele(vc,allele) : allele.getDisplayString();
|
||||||
encoder.encodeTyped(s, BCF2Type.CHAR);
|
encoder.encodeTyped(s, BCF2Type.CHAR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -211,6 +209,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
addGQ(vc);
|
addGQ(vc);
|
||||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||||
addGenotypeFilters(vc);
|
addGenotypeFilters(vc);
|
||||||
|
// } else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||||
|
// addPLs(vc);
|
||||||
} else {
|
} else {
|
||||||
addGenericGenotypeField(vc, field);
|
addGenericGenotypeField(vc, field);
|
||||||
}
|
}
|
||||||
|
|
@ -221,7 +221,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
|
|
||||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
||||||
int nFields = metaData.getCount(vc.getAlternateAlleles().size());
|
int nFields = metaData.getCount(vc.getNAlleles());
|
||||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -264,6 +264,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
encoder.encodeRawMissingValues(numInFormatField, encoding.BCF2Type);
|
encoder.encodeRawMissingValues(numInFormatField, encoding.BCF2Type);
|
||||||
} else {
|
} else {
|
||||||
final Object val = g.getAttribute(field);
|
final Object val = g.getAttribute(field);
|
||||||
|
if ( (val instanceof List) && (((List) val).size() != numInFormatField ))
|
||||||
|
throw new ReviewedStingException("BUG: header for " + field +
|
||||||
|
" has inconsistent number of values " + numInFormatField +
|
||||||
|
" compared to values in VariantContext " + ((List) val).size());
|
||||||
final Collection<Object> vals = numInFormatField == 1 ? Collections.singleton(val) : (Collection)val;
|
final Collection<Object> vals = numInFormatField == 1 ? Collections.singleton(val) : (Collection)val;
|
||||||
encoder.encodeRawValues(vals, encoding.BCF2Type);
|
encoder.encodeRawValues(vals, encoding.BCF2Type);
|
||||||
}
|
}
|
||||||
|
|
@ -297,7 +301,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
case Flag:
|
case Flag:
|
||||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
||||||
case String:
|
case String:
|
||||||
final List<String> s = isList ? (List<String>)value : Collections.singletonList((String)value);
|
final List<String> s = isList ? (List<String>)value : Collections.singletonList((String) value);
|
||||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, s);
|
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, s);
|
||||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||||
List<Integer> l;
|
List<Integer> l;
|
||||||
|
|
@ -346,6 +350,19 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private final void addPLs(final VariantContext vc) throws IOException {
|
||||||
|
// startGenotypeField(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 1, BCF2Type.INT16);
|
||||||
|
// for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
|
// if ( g.hasLog10PError() ) {
|
||||||
|
// final int GQ = (int)Math.round(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||||
|
// if ( GQ > VCFConstants.MAX_GENOTYPE_QUAL ) throw new ReviewedStingException("Unexpectedly large GQ " + GQ + " at " + vc);
|
||||||
|
// encoder.encodeRawValue(GQ, BCF2Type.INT8);
|
||||||
|
// } else {
|
||||||
|
// encoder.encodeRawMissingValues(1, BCF2Type.INT8);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
private final void addGenotypes(final VariantContext vc) throws IOException {
|
private final void addGenotypes(final VariantContext vc) throws IOException {
|
||||||
if ( vc.getNAlleles() > 127 )
|
if ( vc.getNAlleles() > 127 )
|
||||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||||
|
|
@ -390,7 +407,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
|
|
||||||
// iterate over strings until we find one that needs 16 bits, and break
|
// iterate over strings until we find one that needs 16 bits, and break
|
||||||
for ( final String string : strings ) {
|
for ( final String string : strings ) {
|
||||||
final int offset = stringDictionaryMap.get(string);
|
final Integer got = stringDictionaryMap.get(string);
|
||||||
|
if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||||
|
final int offset = got;
|
||||||
offsets.add(offset);
|
offsets.add(offset);
|
||||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||||
switch ( type1 ) {
|
switch ( type1 ) {
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
|
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,9 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureCodecHeader;
|
import org.broad.tribble.FeatureCodecHeader;
|
||||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -44,10 +46,17 @@ import java.util.*;
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class VariantContextTestProvider {
|
public class VariantContextTestProvider {
|
||||||
final private static boolean ADVANCED_TESTS = false;
|
final private static boolean ENABLE_VARARRAY_TESTS = false;
|
||||||
final static VCFHeader header;
|
final private static boolean ENABLE_PLOIDY_TESTS = false;
|
||||||
|
final private static boolean ENABLE_PL_TESTS = true;
|
||||||
|
private static VCFHeader syntheticHeader;
|
||||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||||
final static VariantContext ROOT;
|
private static VariantContext ROOT;
|
||||||
|
|
||||||
|
private final static List<File> testSourceVCFs = Arrays.asList(
|
||||||
|
new File(BaseTest.testDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"),
|
||||||
|
new File(BaseTest.testDir + "dbsnp_135.b37.1000.vcf")
|
||||||
|
);
|
||||||
|
|
||||||
public abstract static class VariantContextIOTest {
|
public abstract static class VariantContextIOTest {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
@ -67,17 +76,19 @@ public class VariantContextTestProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class VariantContextTestData {
|
public static class VariantContextTestData {
|
||||||
|
public final VCFHeader header;
|
||||||
public List<VariantContext> vcs;
|
public List<VariantContext> vcs;
|
||||||
|
|
||||||
public VariantContextTestData(final VariantContextBuilder builder) {
|
public VariantContextTestData(final VCFHeader header, final VariantContextBuilder builder) {
|
||||||
this(Collections.singletonList(builder.make()));
|
this(header, Collections.singletonList(builder.make()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContextTestData(final VariantContext vc) {
|
public VariantContextTestData(final VCFHeader header, final List<VariantContext> vcs) {
|
||||||
this(Collections.singletonList(vc));
|
final Set<String> samples = new HashSet<String>();
|
||||||
}
|
for ( final VariantContext vc : vcs )
|
||||||
|
if ( vc.hasGenotypes() )
|
||||||
public VariantContextTestData(final List<VariantContext> vcs) {
|
samples.addAll(vc.getSampleNames());
|
||||||
|
this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaData(), samples);
|
||||||
this.vcs = vcs;
|
this.vcs = vcs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -88,9 +99,11 @@ public class VariantContextTestProvider {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
b.append("VariantContextTestData: [");
|
b.append("VariantContextTestData: [");
|
||||||
for ( VariantContext vc : vcs ) {
|
final VariantContext vc = vcs.get(0);
|
||||||
b.append(vc.toString()).append(" ----- ");
|
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
}
|
builder.noGenotypes();
|
||||||
|
b.append(builder.make().toString()).append(" nGenotypes = ").append(vc.getNSamples());
|
||||||
|
if ( vcs.size() > 1 ) b.append(" ----- with another ").append(vcs.size() - 1).append(" VariantContext records");
|
||||||
b.append("]");
|
b.append("]");
|
||||||
return b.toString();
|
return b.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -101,11 +114,55 @@ public class VariantContextTestProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static void add(VariantContextBuilder builder) {
|
private final static void add(VariantContextBuilder builder) {
|
||||||
TEST_DATAs.add(new VariantContextTestData(builder));
|
TEST_DATAs.add(new VariantContextTestData(syntheticHeader, builder));
|
||||||
}
|
}
|
||||||
|
|
||||||
static {
|
public static void initializeTests() throws IOException {
|
||||||
|
createSyntheticHeader();
|
||||||
|
makeSyntheticTests();
|
||||||
|
makeEmpiricalTests();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void makeEmpiricalTests() throws IOException {
|
||||||
|
for ( final File file : testSourceVCFs ) {
|
||||||
|
VCFCodec codec = new VCFCodec();
|
||||||
|
Pair<VCFHeader, List<VariantContext>> x = readAllVCs( file, codec );
|
||||||
|
List<VariantContext> fullyDecoded = new ArrayList<VariantContext>(x.getSecond().size());
|
||||||
|
for ( final VariantContext raw : x.getSecond() )
|
||||||
|
fullyDecoded.add(raw.fullyDecode(x.getFirst()));
|
||||||
|
TEST_DATAs.add(new VariantContextTestData(x.getFirst(), x.getSecond()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void createSyntheticHeader() {
|
||||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||||
|
|
||||||
|
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
||||||
|
|
||||||
|
metaData.add(new VCFInfoHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||||
|
// prep the header
|
||||||
|
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||||
|
|
||||||
|
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
||||||
|
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
||||||
|
|
||||||
|
metaData.add(new VCFInfoHeaderLine("INT1", 1, VCFHeaderLineType.Integer, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("FLAG", 1, VCFHeaderLineType.Flag, "x"));
|
||||||
|
|
||||||
|
syntheticHeader = new VCFHeader(metaData);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void makeSyntheticTests() {
|
||||||
VariantContextBuilder rootBuilder = new VariantContextBuilder();
|
VariantContextBuilder rootBuilder = new VariantContextBuilder();
|
||||||
rootBuilder.source("test");
|
rootBuilder.source("test");
|
||||||
rootBuilder.loc("1", 10, 10);
|
rootBuilder.loc("1", 10, 10);
|
||||||
|
|
@ -126,8 +183,6 @@ public class VariantContextTestProvider {
|
||||||
add(builder().passFilters());
|
add(builder().passFilters());
|
||||||
add(builder().filters("FILTER1"));
|
add(builder().filters("FILTER1"));
|
||||||
add(builder().filters("FILTER1", "FILTER2"));
|
add(builder().filters("FILTER1", "FILTER2"));
|
||||||
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
|
||||||
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
|
||||||
|
|
||||||
add(builder().log10PError(VariantContext.NO_LOG10_PERROR));
|
add(builder().log10PError(VariantContext.NO_LOG10_PERROR));
|
||||||
add(builder().log10PError(-1));
|
add(builder().log10PError(-1));
|
||||||
|
|
@ -147,12 +202,6 @@ public class VariantContextTestProvider {
|
||||||
add(builder().attribute("INT3", Arrays.asList(100000, 200000, 300000)));
|
add(builder().attribute("INT3", Arrays.asList(100000, 200000, 300000)));
|
||||||
add(builder().attribute("INT3", null));
|
add(builder().attribute("INT3", null));
|
||||||
add(builder().attribute("INT20", Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
|
add(builder().attribute("INT20", Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
|
||||||
metaData.add(new VCFInfoHeaderLine("INT1", 1, VCFHeaderLineType.Integer, "x"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
|
||||||
|
|
||||||
|
|
||||||
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
|
|
||||||
|
|
||||||
add(builder().attribute("FLOAT1", 1.0));
|
add(builder().attribute("FLOAT1", 1.0));
|
||||||
add(builder().attribute("FLOAT1", 100.0));
|
add(builder().attribute("FLOAT1", 100.0));
|
||||||
|
|
@ -163,32 +212,17 @@ public class VariantContextTestProvider {
|
||||||
add(builder().attribute("FLOAT3", Arrays.asList(1000.0, 2000.0, 3000.0)));
|
add(builder().attribute("FLOAT3", Arrays.asList(1000.0, 2000.0, 3000.0)));
|
||||||
add(builder().attribute("FLOAT3", Arrays.asList(100000.0, 200000.0, 300000.0)));
|
add(builder().attribute("FLOAT3", Arrays.asList(100000.0, 200000.0, 300000.0)));
|
||||||
add(builder().attribute("FLOAT3", null));
|
add(builder().attribute("FLOAT3", null));
|
||||||
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
|
|
||||||
|
|
||||||
add(builder().attribute("FLAG", true));
|
add(builder().attribute("FLAG", true));
|
||||||
add(builder().attribute("FLAG", false));
|
add(builder().attribute("FLAG", false));
|
||||||
metaData.add(new VCFInfoHeaderLine("FLAG", 1, VCFHeaderLineType.Flag, "x"));
|
|
||||||
|
|
||||||
add(builder().attribute("STRING1", "s1"));
|
add(builder().attribute("STRING1", "s1"));
|
||||||
add(builder().attribute("STRING1", null));
|
add(builder().attribute("STRING1", null));
|
||||||
add(builder().attribute("STRING3", Arrays.asList("s1", "s2", "s3")));
|
add(builder().attribute("STRING3", Arrays.asList("s1", "s2", "s3")));
|
||||||
add(builder().attribute("STRING3", null));
|
add(builder().attribute("STRING3", null));
|
||||||
add(builder().attribute("STRING20", Arrays.asList("s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));
|
add(builder().attribute("STRING20", Arrays.asList("s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));
|
||||||
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
|
||||||
|
|
||||||
metaData.add(new VCFInfoHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
|
||||||
metaData.add(new VCFInfoHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
|
||||||
|
|
||||||
addGenotypesToTestData();
|
addGenotypesToTestData();
|
||||||
|
|
||||||
// prep the header
|
|
||||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
|
||||||
|
|
||||||
header = new VCFHeader(metaData);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addGenotypesToTestData() {
|
private static void addGenotypesToTestData() {
|
||||||
|
|
@ -250,7 +284,7 @@ public class VariantContextTestProvider {
|
||||||
addGenotypeTests(site, homRef, het, homVar);
|
addGenotypeTests(site, homRef, het, homVar);
|
||||||
|
|
||||||
// ploidy
|
// ploidy
|
||||||
if ( ADVANCED_TESTS ) {
|
if ( ENABLE_PLOIDY_TESTS ) {
|
||||||
addGenotypeTests(site,
|
addGenotypeTests(site,
|
||||||
new Genotype("dip", Arrays.asList(ref, alt1)),
|
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||||
new Genotype("hap", Arrays.asList(ref)));
|
new Genotype("hap", Arrays.asList(ref)));
|
||||||
|
|
@ -261,7 +295,7 @@ public class VariantContextTestProvider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ADVANCED_TESTS ) {
|
if ( ENABLE_PL_TESTS ) {
|
||||||
// testing PLs
|
// testing PLs
|
||||||
addGenotypeTests(site,
|
addGenotypeTests(site,
|
||||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}),
|
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}),
|
||||||
|
|
@ -294,7 +328,7 @@ public class VariantContextTestProvider {
|
||||||
attr("g1", ref, "INT3", 1, 2, 3),
|
attr("g1", ref, "INT3", 1, 2, 3),
|
||||||
attr("g2", ref, "INT3"));
|
attr("g2", ref, "INT3"));
|
||||||
|
|
||||||
if ( ADVANCED_TESTS ) {
|
if (ENABLE_VARARRAY_TESTS) {
|
||||||
addGenotypeTests(site,
|
addGenotypeTests(site,
|
||||||
attr("g1", ref, "INT.VAR", 1, 2, 3),
|
attr("g1", ref, "INT.VAR", 1, 2, 3),
|
||||||
attr("g2", ref, "INT.VAR", 4, 5),
|
attr("g2", ref, "INT.VAR", 4, 5),
|
||||||
|
|
@ -331,13 +365,6 @@ public class VariantContextTestProvider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static VCFHeader getHeader(final List<VariantContext> vcs) {
|
|
||||||
final Set<String> samples = new HashSet<String>();
|
|
||||||
for ( final VariantContext vc : vcs )
|
|
||||||
samples.addAll(vc.getSampleNames());
|
|
||||||
return new VCFHeader(header.getMetaData(), samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static List<VariantContextTestData> generateSiteTests() {
|
public static List<VariantContextTestData> generateSiteTests() {
|
||||||
return TEST_DATAs;
|
return TEST_DATAs;
|
||||||
}
|
}
|
||||||
|
|
@ -351,31 +378,37 @@ public class VariantContextTestProvider {
|
||||||
// write
|
// write
|
||||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||||
writer.writeHeader(VariantContextTestProvider.getHeader(data.vcs));
|
writer.writeHeader(data.header);
|
||||||
final List<VariantContext> expected = data.vcs;
|
final List<VariantContext> expected = data.vcs;
|
||||||
for ( VariantContext vc : expected )
|
for ( VariantContext vc : expected )
|
||||||
writer.add(vc);
|
writer.add(vc);
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
// read in the features
|
final List<VariantContext> actual = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
||||||
FeatureCodec<VariantContext> codec = tester.makeCodec();
|
|
||||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
|
||||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
|
||||||
pbs.close();
|
|
||||||
// TODO -- test header quality
|
|
||||||
|
|
||||||
pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
|
||||||
pbs.skip(header.getHeaderEnd());
|
|
||||||
|
|
||||||
final List<VariantContext> actual = new ArrayList<VariantContext>(expected.size());
|
|
||||||
while ( ! pbs.isDone() ) { actual.add(codec.decode(pbs)); };
|
|
||||||
|
|
||||||
Assert.assertEquals(actual.size(), expected.size());
|
Assert.assertEquals(actual.size(), expected.size());
|
||||||
|
|
||||||
for ( int i = 0; i < expected.size(); i++ )
|
for ( int i = 0; i < expected.size(); i++ )
|
||||||
VariantContextTestProvider.assertEquals(actual.get(i), expected.get(i));
|
VariantContextTestProvider.assertEquals(actual.get(i), expected.get(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static Pair<VCFHeader, List<VariantContext>> readAllVCs( final File source, final FeatureCodec<VariantContext> codec ) throws IOException {
|
||||||
|
// read in the features
|
||||||
|
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||||
|
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||||
|
pbs.close();
|
||||||
|
|
||||||
|
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||||
|
pbs.skip(header.getHeaderEnd());
|
||||||
|
|
||||||
|
final List<VariantContext> read = new ArrayList<VariantContext>();
|
||||||
|
while ( ! pbs.isDone() ) {
|
||||||
|
final VariantContext vc = codec.decode(pbs);
|
||||||
|
if ( vc != null ) read.add(vc);
|
||||||
|
};
|
||||||
|
|
||||||
|
return new Pair<VCFHeader, List<VariantContext>>((VCFHeader)header.getHeaderValue(), read);
|
||||||
|
}
|
||||||
|
|
||||||
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
|
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
|
||||||
Assert.assertNotNull(actual);
|
Assert.assertNotNull(actual);
|
||||||
Assert.assertEquals(actual.getChr(), expected.getChr());
|
Assert.assertEquals(actual.getChr(), expected.getChr());
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ public class VariantContextWritersUnitTest extends BaseTest {
|
||||||
public void before() throws IOException {
|
public void before() throws IOException {
|
||||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
dictionary = seq.getSequenceDictionary();
|
dictionary = seq.getSequenceDictionary();
|
||||||
|
VariantContextTestProvider.initializeTests();
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "VariantContextTest_SingleContexts")
|
@DataProvider(name = "VariantContextTest_SingleContexts")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue