Efficient Genotype object Intermediate commit
-- Created a new Genotype interface with a more limited set of operations -- Old genotype object is now SlowGenotype. New genotype object is FastGenotype. They can be used interchangable -- There's no way to create Genotypes directly any longer. You have to use GenotypeBuilder just like VariantContextBuilder -- Modified lots and lots of code to use GenotypeBuilder -- Added a temporary hidden argument to engine to use FastGenotype by default. Current default is SlowGenotype -- Lots of bug fixes to BCF2 codec and encoder. -- Feature additions -- Now properly handles BCF2 -> BCF2 without decoding or encoding from scratch the BCF2 genotype bytes -- Cleaned up semantics of subContextFromSamples. There's one function that either rederives or not the alleles from the subsetted genotypes -- MASSIVE BUGFIX in SelectVariants. The code has been decoding genotypes always, even if you were not subsetting down samples. Fixed!
This commit is contained in:
parent
a648b5e65e
commit
d37a8a0bc8
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -221,6 +222,10 @@ public class GenomeAnalysisEngine {
|
|||
if (this.getArguments().nonDeterministicRandomSeed)
|
||||
resetRandomGenerator(System.currentTimeMillis());
|
||||
|
||||
// TODO -- REMOVE ME WHEN WE STOP BCF testing
|
||||
if ( this.getArguments().USE_FAST_GENOTYPES )
|
||||
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = true;
|
||||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);
|
||||
|
|
|
|||
|
|
@ -336,6 +336,11 @@ public class GATKArgumentCollection {
|
|||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="useFastGenotypes",shortName = "useFastGenotypes",doc="",required=false)
|
||||
@Hidden
|
||||
public boolean USE_FAST_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
Genotype call = new Genotype(name, genotypeAlleles);
|
||||
Genotype call = GenotypeBuilder.create(name, genotypeAlleles);
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
|
|
@ -344,7 +344,7 @@ public class VariantContextAdaptors {
|
|||
alleles.add(allele2);
|
||||
}
|
||||
|
||||
Genotype g = new Genotype(samples[i], myAlleles);
|
||||
Genotype g = GenotypeBuilder.create(samples[i], myAlleles);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
// we need to add counts in the correct order
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
||||
|
|
@ -124,7 +124,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
}
|
||||
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||
|
|
@ -132,8 +132,8 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
return toADAnnotation(counts);
|
||||
}
|
||||
|
||||
private final Map<String, Object> toADAnnotation(final Integer[] counts) {
|
||||
return Collections.singletonMap(getKeyNames().get(0), (Object)Arrays.asList(counts));
|
||||
private final Map<String, Object> toADAnnotation(final int[] counts) {
|
||||
return Collections.singletonMap(getKeyNames().get(0), (Object)counts);
|
||||
}
|
||||
|
||||
private String getAlleleRepresentation(Allele allele) {
|
||||
|
|
|
|||
|
|
@ -272,13 +272,13 @@ public class VariantAnnotatorEngine {
|
|||
continue;
|
||||
}
|
||||
|
||||
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
|
||||
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getExtendedAttributes());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
Map<String, Object> result = annotation.annotate(tracker, walker, ref, context, vc, genotype);
|
||||
if ( result != null )
|
||||
genotypeAnnotations.putAll(result);
|
||||
}
|
||||
genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
genotypes.add(new GenotypeBuilder(genotype).attributes(genotypeAnnotations).make());
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -204,8 +204,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
for ( final Genotype g : vc_input.getGenotypes() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
|
||||
boolean genotypeIsPhased = true;
|
||||
String sample = g.getSampleName();
|
||||
|
||||
|
|
@ -271,7 +269,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
||||
// Beagle gives probability that genotype is AA, AB and BB.
|
||||
// Which, by definition, are prob of hom ref, het and hom var.
|
||||
Double probWrongGenotype, genotypeQuality;
|
||||
double probWrongGenotype, genotypeQuality;
|
||||
Double homRefProbability = Double.valueOf(beagleProbabilities.get(0));
|
||||
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
||||
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
||||
|
|
@ -300,7 +298,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
genotypeQuality = log10(probWrongGenotype);
|
||||
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getAttributes());
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getExtendedAttributes());
|
||||
|
||||
// get original encoding and add to keynotype attributes
|
||||
String a1, a2, og;
|
||||
|
|
@ -328,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
}
|
||||
Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).GQ(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
beagleVarCounts++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,10 +36,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -260,7 +257,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
|
|
@ -270,21 +267,20 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
|
||||
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
infos.put("MED", sampleStat.getQuantileDepth(0.50));
|
||||
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
gb.DP((int)sampleStat.averageCoverage());
|
||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
|
|
@ -299,8 +295,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
Set<String> output = new HashSet<String>(statuses.size());
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
List<String> output = new ArrayList<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
|
|
|
|||
|
|
@ -79,14 +79,12 @@ class SampleStatistics {
|
|||
* @return the callable statuses of the entire sample
|
||||
*/
|
||||
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// We check if reads are present ot prevent div / 0 exceptions
|
||||
if (nReads == 0) {
|
||||
output.add(CallableStatus.NO_READS);
|
||||
return output;
|
||||
return Collections.singleton(CallableStatus.NO_READS);
|
||||
}
|
||||
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
|
||||
|
||||
// initialize map
|
||||
|
|
@ -126,6 +124,7 @@ class SampleStatistics {
|
|||
if (output.isEmpty()) {
|
||||
output.add(CallableStatus.PASS);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ import org.broad.tribble.AbstractFeatureReader;
|
|||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -109,9 +111,12 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
for (Genotype g : vc.getGenotypes() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasLog10PError() ? g.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
if ( g.hasGQ() ) gRoot.add("GQ", g.getGQ() );
|
||||
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
|
||||
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
|
||||
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
|
||||
|
||||
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
|
||||
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") )
|
||||
gRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -297,13 +297,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
// for each genotype, check filters then create a new object
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
List<String> filters = new ArrayList<String>(g.getFilters());
|
||||
|
||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||
if ( VariantContextUtils.match(vc, g, exp) )
|
||||
filters.add(exp.name);
|
||||
}
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
|
||||
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
||||
} else {
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,13 +141,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
if (context.hasBasePileup()) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
|
||||
|
|
|
|||
|
|
@ -158,12 +158,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
myLikelihoods[i] = allLikelihoods[PLordering[i]];
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth);
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name);
|
||||
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
|
||||
gb.PL(genotypeLikelihoods);
|
||||
gb.DP(sampleData.depth);
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).make();
|
||||
|
|
|
|||
|
|
@ -1148,13 +1148,12 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( String sample : normalSamples ) {
|
||||
|
||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||
|
||||
if ( ! discard_event ) // we made a call - put actual het genotype here:
|
||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
gb.attributes(call.makeStatsAttributes(null));
|
||||
gb.alleles(! discard_event
|
||||
? alleles // we made a call - put actual het genotype here:
|
||||
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(gb.make());
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
|
|
@ -1238,11 +1237,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsNormal,false));
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsTumor,false) );
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
|
|
|
|||
|
|
@ -97,10 +97,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
private ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
||||
//Matrix of priors for all genotype combinations
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> mvCountMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix;
|
||||
|
||||
//Matrix of allele transmission
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>> transmissionMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>> transmissionMatrix;
|
||||
|
||||
//Metrics counters hash keys
|
||||
private final Byte NUM_TRIO_GENOTYPES_CALLED = 0;
|
||||
|
|
@ -138,17 +138,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
private EnumMap<FamilyMember,Genotype> trioPhasedGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
|
||||
|
||||
private ArrayList<Allele> getAlleles(Genotype.Type genotype){
|
||||
private ArrayList<Allele> getAlleles(GenotypeType genotype){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
if(genotype == Genotype.Type.HOM_REF){
|
||||
if(genotype == GenotypeType.HOM_REF){
|
||||
alleles.add(REF);
|
||||
alleles.add(REF);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HET){
|
||||
else if(genotype == GenotypeType.HET){
|
||||
alleles.add(REF);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HOM_VAR){
|
||||
else if(genotype == GenotypeType.HOM_VAR){
|
||||
alleles.add(VAR);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
|
|
@ -158,27 +158,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
return alleles;
|
||||
}
|
||||
|
||||
private boolean isPhasable(Genotype.Type genotype){
|
||||
return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR;
|
||||
private boolean isPhasable(GenotypeType genotype){
|
||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
//Create a new Genotype based on information from a single individual
|
||||
//Homozygous genotypes will be set as phased, heterozygous won't be
|
||||
private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){
|
||||
if(genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HOM_VAR){
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME, getAlleles(genotype), Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
}
|
||||
else
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME,getAlleles(genotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
private void phaseSingleIndividualAlleles(GenotypeType genotype, FamilyMember familyMember){
|
||||
boolean phase = genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HOM_VAR;
|
||||
trioPhasedGenotypes.put(familyMember, makeGenotype(genotype, phase));
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final GenotypeType type, boolean phase) {
|
||||
return makeGenotype(getAlleles(type), phase);
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final List<Allele> alleles, boolean phase) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
|
||||
gb.phased(phase);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
//Find the phase for a parent/child pair
|
||||
private void phasePairAlleles(Genotype.Type parentGenotype, Genotype.Type childGenotype, FamilyMember parent){
|
||||
private void phasePairAlleles(GenotypeType parentGenotype, GenotypeType childGenotype, FamilyMember parent){
|
||||
|
||||
//Special case for Het/Het as it is ambiguous
|
||||
if(parentGenotype == Genotype.Type.HET && childGenotype == Genotype.Type.HET){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, getAlleles(parentGenotype), Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
if(parentGenotype == GenotypeType.HET && childGenotype == GenotypeType.HET){
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -190,34 +197,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//If there is a possible phasing between the parent and child => phase
|
||||
int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
|
||||
if(childTransmittedAlleleIndex > -1){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
|
||||
parentPhasedAlleles.add(parentAlleles.get(1));
|
||||
parentPhasedAlleles.add(parentAlleles.get(0));
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentPhasedAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
//This is a Mendelian Violation => Do not phase
|
||||
else{
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME,getAlleles(parentGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
}
|
||||
}
|
||||
|
||||
//Phases a family by transmission
|
||||
private void phaseFamilyAlleles(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private void phaseFamilyAlleles(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
Set<ArrayList<Allele>> possiblePhasedChildGenotypes = new HashSet<ArrayList<Allele>>();
|
||||
ArrayList<Allele> motherAlleles = getAlleles(mother);
|
||||
|
|
@ -246,7 +253,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
motherPhasedAlleles.add(motherAlleles.get(0));
|
||||
else
|
||||
motherPhasedAlleles.add(motherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,motherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(motherPhasedAlleles, true));
|
||||
|
||||
//Create father's genotype
|
||||
ArrayList<Allele> fatherPhasedAlleles = new ArrayList<Allele>(2);
|
||||
|
|
@ -255,10 +262,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
fatherPhasedAlleles.add(fatherAlleles.get(0));
|
||||
else
|
||||
fatherPhasedAlleles.add(fatherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,fatherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(fatherPhasedAlleles,true));
|
||||
|
||||
//Create child's genotype
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,childPhasedAllelesAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAllelesAlleles,true));
|
||||
|
||||
//Once a phased combination is found; exit
|
||||
return;
|
||||
|
|
@ -266,16 +273,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//If this is reached then no phasing could be found
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,getAlleles(mother),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,getAlleles(father),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(child),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(father,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(child,false));
|
||||
}
|
||||
|
||||
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
|
||||
If one or more genotypes are set as NO_CALL or UNAVAILABLE, it will phase them like a pair
|
||||
or single individual.
|
||||
*/
|
||||
public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
public TrioPhase(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Take care of cases where one or more family members are no call
|
||||
if(!isPhasable(child)){
|
||||
|
|
@ -297,7 +304,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
}
|
||||
//Special case for Het/Het/Het as it is ambiguous
|
||||
else if(mother == Genotype.Type.HET && father == Genotype.Type.HET && child == Genotype.Type.HET){
|
||||
else if(mother == GenotypeType.HET && father == GenotypeType.HET && child == GenotypeType.HET){
|
||||
phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
|
||||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
|
||||
|
|
@ -311,7 +318,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){
|
||||
ArrayList<Allele> childAlleles = new ArrayList<Allele>(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles());
|
||||
childAlleles.add(childAlleles.remove(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,makeGenotype(childAlleles,true));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -347,7 +354,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Add the transmission probability
|
||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getAttributes());
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
|
||||
|
|
@ -370,7 +377,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
else
|
||||
log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType());
|
||||
|
||||
return new Genotype(genotype.getSampleName(), phasedAlleles, log10Error, null, genotypeAttributes, phasedGenotype.isPhased());
|
||||
return new GenotypeBuilder(genotype).alleles(phasedAlleles)
|
||||
.GQ(log10Error)
|
||||
.attributes(genotypeAttributes)
|
||||
.phased(phasedGenotype.isPhased()).make();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -438,15 +448,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Create the transmission matrices
|
||||
private void buildMatrices(){
|
||||
mvCountMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
transmissionMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mother : Genotype.Type.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>(Genotype.Type.class));
|
||||
for(Genotype.Type father : Genotype.Type.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<Genotype.Type, Integer>(Genotype.Type.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<Genotype.Type,TrioPhase>(Genotype.Type.class));
|
||||
for(Genotype.Type child : Genotype.Type.values()){
|
||||
mvCountMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
transmissionMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>>(GenotypeType.class);
|
||||
for(GenotypeType mother : GenotypeType.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>(GenotypeType.class));
|
||||
for(GenotypeType father : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioPhase>(GenotypeType.class));
|
||||
for(GenotypeType child : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
||||
transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child));
|
||||
}
|
||||
|
|
@ -457,16 +467,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Returns the number of Mendelian Violations for a given genotype combination.
|
||||
//If one of the parents genotype is missing, it will consider it as a parent/child pair
|
||||
//If the child genotype or both parents genotypes are missing, 0 is returned.
|
||||
private int getCombinationMVCount(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private int getCombinationMVCount(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Child is no call => No MV
|
||||
if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE)
|
||||
if(child == GenotypeType.NO_CALL || child == GenotypeType.UNAVAILABLE)
|
||||
return 0;
|
||||
//Add parents with genotypes for the evaluation
|
||||
ArrayList<Genotype.Type> parents = new ArrayList<Genotype.Type>();
|
||||
if (!(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE))
|
||||
ArrayList<GenotypeType> parents = new ArrayList<GenotypeType>();
|
||||
if (!(mother == GenotypeType.NO_CALL || mother == GenotypeType.UNAVAILABLE))
|
||||
parents.add(mother);
|
||||
if (!(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE))
|
||||
if (!(father == GenotypeType.NO_CALL || father == GenotypeType.UNAVAILABLE))
|
||||
parents.add(father);
|
||||
|
||||
//Both parents no calls => No MV
|
||||
|
|
@ -477,35 +487,35 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int parentsNumRefAlleles = 0;
|
||||
int parentsNumAltAlleles = 0;
|
||||
|
||||
for(Genotype.Type parent : parents){
|
||||
if(parent == Genotype.Type.HOM_REF){
|
||||
for(GenotypeType parent : parents){
|
||||
if(parent == GenotypeType.HOM_REF){
|
||||
parentsNumRefAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HET){
|
||||
else if(parent == GenotypeType.HET){
|
||||
parentsNumRefAlleles++;
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HOM_VAR){
|
||||
else if(parent == GenotypeType.HOM_VAR){
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
}
|
||||
|
||||
//Case Child is HomRef
|
||||
if(child == Genotype.Type.HOM_REF){
|
||||
if(child == GenotypeType.HOM_REF){
|
||||
if(parentsNumRefAlleles == parents.size())
|
||||
return 0;
|
||||
else return (parents.size()-parentsNumRefAlleles);
|
||||
}
|
||||
|
||||
//Case child is HomVar
|
||||
if(child == Genotype.Type.HOM_VAR){
|
||||
if(child == GenotypeType.HOM_VAR){
|
||||
if(parentsNumAltAlleles == parents.size())
|
||||
return 0;
|
||||
else return parents.size()-parentsNumAltAlleles;
|
||||
}
|
||||
|
||||
//Case child is Het
|
||||
if(child == Genotype.Type.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
if(child == GenotypeType.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
return 0;
|
||||
|
||||
//MV
|
||||
|
|
@ -513,7 +523,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//Given two trio genotypes combinations, returns the number of different genotypes between the two combinations.
|
||||
private int countFamilyGenotypeDiff(Genotype.Type motherOriginal,Genotype.Type fatherOriginal,Genotype.Type childOriginal,Genotype.Type motherNew,Genotype.Type fatherNew,Genotype.Type childNew){
|
||||
private int countFamilyGenotypeDiff(GenotypeType motherOriginal,GenotypeType fatherOriginal,GenotypeType childOriginal,GenotypeType motherNew,GenotypeType fatherNew,GenotypeType childNew){
|
||||
int count = 0;
|
||||
if(motherOriginal!=motherNew)
|
||||
count++;
|
||||
|
|
@ -526,21 +536,21 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Get a Map of genotype likelihoods.
|
||||
//In case of null, unavailable or no call, all likelihoods are 1/3.
|
||||
private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
if(genotype == null || !genotype.isCalled()){
|
||||
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HET,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
|
||||
EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HET,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
|
||||
return likelihoods;
|
||||
}
|
||||
return genotype.getLikelihoods().getAsMap(true);
|
||||
}
|
||||
|
||||
//Returns the Genotype.Type; returns UNVAILABLE if given null
|
||||
private Genotype.Type getTypeSafeNull(Genotype genotype){
|
||||
//Returns the GenotypeType; returns UNVAILABLE if given null
|
||||
private GenotypeType getTypeSafeNull(Genotype genotype){
|
||||
if(genotype == null)
|
||||
return Genotype.Type.UNAVAILABLE;
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
return genotype.getType();
|
||||
}
|
||||
|
||||
|
|
@ -561,18 +571,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Always assign the first parent as the parent having genotype information in pairs
|
||||
//Always assign the mother as the first parent in trios
|
||||
int parentsCalled = 0;
|
||||
Map<Genotype.Type,Double> firstParentLikelihoods;
|
||||
Map<Genotype.Type,Double> secondParentLikelihoods;
|
||||
ArrayList<Genotype.Type> bestFirstParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestSecondParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
|
||||
Genotype.Type pairSecondParentGenotype = null;
|
||||
Map<GenotypeType,Double> firstParentLikelihoods;
|
||||
Map<GenotypeType,Double> secondParentLikelihoods;
|
||||
ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
|
||||
GenotypeType pairSecondParentGenotype = null;
|
||||
if(mother == null || !mother.isCalled()){
|
||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
bestFirstParentGenotype.add(getTypeSafeNull(father));
|
||||
bestSecondParentGenotype.add(getTypeSafeNull(mother));
|
||||
pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
|
||||
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
|
||||
if(father != null && father.isCalled())
|
||||
parentsCalled = 1;
|
||||
}
|
||||
|
|
@ -583,12 +593,12 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
bestSecondParentGenotype.add(getTypeSafeNull(father));
|
||||
if(father == null || !father.isCalled()){
|
||||
parentsCalled = 1;
|
||||
pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
|
||||
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
|
||||
}else{
|
||||
parentsCalled = 2;
|
||||
}
|
||||
}
|
||||
Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
bestChildGenotype.add(getTypeSafeNull(child));
|
||||
|
||||
//Prior vars
|
||||
|
|
@ -604,9 +614,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int mvCount;
|
||||
int cumulativeMVCount = 0;
|
||||
double configurationLikelihood = 0;
|
||||
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
|
||||
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
|
||||
if(parentsCalled<2){
|
||||
|
|
|
|||
|
|
@ -109,14 +109,13 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
|
||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||
|
||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).GQ(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
@ -374,7 +374,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
|
||||
// true <-> can trivially phase a hom site relative to ANY previous site:
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getLog10PError(), gt.getFilters(), gt.getAttributes(), true);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt).phased(true).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
|
||||
|
|
@ -408,9 +408,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (DEBUG) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||
|
||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
gtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt)
|
||||
.alleles(allelePair.getAllelesAsList())
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
|
||||
|
|
@ -428,9 +429,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
interiorUvc.setPhasingInconsistent();
|
||||
|
||||
if (genotypesArePhased) {
|
||||
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
|
||||
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
|
||||
Genotype phasedHomGt = new GenotypeBuilder(handledGt)
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
interiorUvc.setGenotype(samp, phasedHomGt);
|
||||
}
|
||||
}
|
||||
|
|
@ -1439,7 +1440,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
public static boolean isUnfilteredCalledDiploidGenotype(Genotype gt) {
|
||||
return (gt.isNotFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
return (! gt.isFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
}
|
||||
|
||||
private class MultipleBaseCountsWriter {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -54,7 +55,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* Initialize this object
|
||||
*/
|
||||
public GenotypeConcordance() {
|
||||
final int nGenotypeTypes = Genotype.Type.values().length;
|
||||
final int nGenotypeTypes = GenotypeType.values().length;
|
||||
truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes];
|
||||
}
|
||||
|
||||
|
|
@ -75,11 +76,11 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
if (eval != null) {
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
final GenotypeType called = g.getType();
|
||||
final GenotypeType truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
truth = Genotype.Type.NO_CALL;
|
||||
truth = GenotypeType.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
}
|
||||
|
|
@ -90,19 +91,19 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
// otherwise, mark no-calls for all samples
|
||||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
final GenotypeType called = GenotypeType.NO_CALL;
|
||||
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
final GenotypeType truth = g.getType();
|
||||
incrValue(truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||
if ( (truth == GenotypeType.HOM_VAR || truth == GenotypeType.HET) && called == GenotypeType.NO_CALL ) {
|
||||
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
|
||||
}
|
||||
if ( (called == Genotype.Type.HOM_VAR || called == Genotype.Type.HET) && truth == Genotype.Type.HOM_REF ) {
|
||||
if ( (called == GenotypeType.HOM_VAR || called == GenotypeType.HET) && truth == GenotypeType.HOM_REF ) {
|
||||
super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation);
|
||||
}
|
||||
}
|
||||
|
|
@ -121,36 +122,36 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* @param truth the truth type
|
||||
* @param called the called type
|
||||
*/
|
||||
private void incrValue(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private void incrValue(final GenotypeType truth, final GenotypeType called) {
|
||||
truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++;
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private long count(final GenotypeType truth, final GenotypeType called) {
|
||||
return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()];
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final Genotype.Type called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final GenotypeType called) {
|
||||
return count(truth, EnumSet.of(called));
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final GenotypeType truth, final EnumSet<GenotypeType> called) {
|
||||
return count(EnumSet.of(truth), called);
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final EnumSet<GenotypeType> called) {
|
||||
long sum = 0;
|
||||
for ( final Genotype.Type truth1 : truth ) {
|
||||
for ( final Genotype.Type called1 : called ) {
|
||||
for ( final GenotypeType truth1 : truth ) {
|
||||
for ( final GenotypeType called1 : called ) {
|
||||
sum += count(truth1, called1);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
private long countDiag( final EnumSet<Genotype.Type> d1 ) {
|
||||
private long countDiag( final EnumSet<GenotypeType> d1 ) {
|
||||
long sum = 0;
|
||||
|
||||
for(final Genotype.Type e1 : d1 ) {
|
||||
for(final GenotypeType e1 : d1 ) {
|
||||
sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -159,13 +160,13 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
final EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||
final EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||
final EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||
final EnumSet<GenotypeType> allVariantGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET);
|
||||
final EnumSet<GenotypeType> allCalledGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET, GenotypeType.HOM_REF);
|
||||
final EnumSet<GenotypeType> allGenotypes = EnumSet.allOf(GenotypeType.class);
|
||||
|
||||
// exact values of the table
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("n_true_%s_called_%s", truth, called);
|
||||
final Long value = count(truth, called);
|
||||
map.put(field, value.toString());
|
||||
|
|
@ -173,20 +174,20 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
// counts of called genotypes
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("total_called_%s", called);
|
||||
final Long value = count(allGenotypes, called);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
// counts of true genotypes
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
final String field = String.format("total_true_%s", truth);
|
||||
final Long value = count(truth, allGenotypes);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
for ( final Genotype.Type genotype : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType genotype : GenotypeType.values() ) {
|
||||
final String field = String.format("percent_%s_called_%s", genotype, genotype);
|
||||
long numer = count(genotype, genotype);
|
||||
long denom = count(EnumSet.of(genotype), allGenotypes);
|
||||
|
|
@ -215,7 +216,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
// overall genotype concordance of sites called non-ref in eval track
|
||||
// MAD: this is the non-reference discrepancy rate
|
||||
final String field = "percent_non_reference_discrepancy_rate";
|
||||
long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF);
|
||||
long homrefConcords = count(GenotypeType.HOM_REF, GenotypeType.HOM_REF);
|
||||
long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
long numer = allNoHomRef - countDiag(allVariantGenotypes);
|
||||
long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
|
||||
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
newA = Allele.NO_CALL;
|
||||
newAlleles.add(newA);
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
|
|
|
|||
|
|
@ -538,7 +538,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (!selectedTypes.contains(vc.getType()))
|
||||
continue;
|
||||
|
||||
VariantContext sub = subsetRecord(vc, samples, EXCLUDE_NON_VARIANTS);
|
||||
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
||||
|
||||
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
|
||||
|
|
@ -687,18 +687,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
|
||||
*
|
||||
* @param vc the VariantContext record to subset
|
||||
* @param samples the samples to extract
|
||||
* @return the subsetted VariantContext
|
||||
*/
|
||||
private VariantContext subsetRecord(final VariantContext vc, final Set<String> samples, final boolean excludeNonVariants) {
|
||||
if ( samples == null || samples.isEmpty() )
|
||||
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
|
||||
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
|
||||
return vc;
|
||||
|
||||
final VariantContext sub;
|
||||
if ( excludeNonVariants )
|
||||
sub = vc.subContextFromSamples(samples); // strip out the alternate alleles that aren't being used
|
||||
else
|
||||
sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
|
@ -708,15 +704,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
if ( fractionGenotypes > 0 ){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).GQ(-1).make());
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
@ -750,14 +744,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for (String sample : originalVC.getSampleNames()) {
|
||||
Genotype g = originalVC.getGenotype(sample);
|
||||
|
||||
if ( g.isNotFiltered() ) {
|
||||
|
||||
String dp = (String) g.getAttribute("DP");
|
||||
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
depth += Integer.valueOf(dp);
|
||||
}
|
||||
if ( ! g.isFiltered() ) {
|
||||
if ( g.hasDP() )
|
||||
depth += g.getDP();
|
||||
}
|
||||
}
|
||||
|
||||
builder.attribute("DP", depth);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
||||
Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
|
||||
builder.genotypes(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -30,7 +31,7 @@ public class MendelianViolation {
|
|||
private boolean allCalledOnly = true;
|
||||
|
||||
//Stores occurrences of inheritance
|
||||
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||
private EnumMap<GenotypeType, EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inheritance;
|
||||
|
||||
private int violations_total=0;
|
||||
|
||||
|
|
@ -74,119 +75,119 @@ public class MendelianViolation {
|
|||
|
||||
//Count of HomRef/HomRef/HomRef trios
|
||||
public int getRefRefRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of HomVar/HomVar/HomVar trios
|
||||
public int getVarVarVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of HomRef/HomVar/Het trios
|
||||
public int getRefVarHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HET) +
|
||||
inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/Het trios
|
||||
public int getHetHetHet(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomRef trios
|
||||
public int getHetHetHomRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomVar trios
|
||||
public int getHetHetHomVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
//return parentsHetHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHetHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
//return parentsHomRefHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
//return parentsHomRefHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
//return parentsHomVarHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHomVarHet_childVar;
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||
public int getParentsRefRefChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||
public int getParentsRefRefChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||
public int getParentsRefHetChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||
public int getParentsRefVarChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||
public int getParentsRefVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||
public int getParentsVarHetChildRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||
public int getParentsVarVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||
public int getParentsVarVarChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -362,12 +363,12 @@ public class MendelianViolation {
|
|||
|
||||
private void createInheritanceMap(){
|
||||
|
||||
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
inheritance.put(mType, new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<GenotypeType,Integer>(GenotypeType.class));
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -376,9 +377,9 @@ public class MendelianViolation {
|
|||
}
|
||||
|
||||
private void clearInheritanceMap(){
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -223,6 +223,19 @@ public class Utils {
|
|||
return ret.toString();
|
||||
}
|
||||
|
||||
public static String join(String separator, int[] ints) {
|
||||
if ( ints == null || ints.length == 0)
|
||||
return "";
|
||||
else {
|
||||
StringBuilder ret = new StringBuilder(ints[0]);
|
||||
for (int i = 1; i < ints.length; ++i) {
|
||||
ret.append(separator);
|
||||
ret.append(ints[i]);
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||
|
|
|
|||
|
|
@ -314,7 +314,9 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
final LazyGenotypesContext.LazyParser lazyParser =
|
||||
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
|
||||
final int nGenotypes = header.getGenotypeSamples().size();
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, decoder.getRecordBytes(), nGenotypes);
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
|
||||
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
|
||||
nGenotypes);
|
||||
|
||||
// did we resort the sample names? If so, we need to load the genotype data
|
||||
if ( !header.samplesWereAlreadySorted() )
|
||||
|
|
@ -324,6 +326,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
}
|
||||
}
|
||||
|
||||
public static class LazyData {
|
||||
final public int nGenotypeFields;
|
||||
final public byte[] bytes;
|
||||
|
||||
public LazyData(final int nGenotypeFields, final byte[] bytes) {
|
||||
this.nGenotypeFields = nGenotypeFields;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
private final String getDictionaryString() {
|
||||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,6 +58,23 @@ public class BCF2Encoder {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method for writing raw bytes to the encoder stream
|
||||
*
|
||||
* The purpuse this method exists is to allow lazy decoding of genotype data. In that
|
||||
* situation the reader has loaded a block of bytes, and never decoded it, so we
|
||||
* are just writing it back out immediately as a raw stream of blocks. Any
|
||||
* bad low-level formatting or changes to that byte[] will result in a malformed
|
||||
* BCF2 block.
|
||||
*
|
||||
* @param bytes a non-null byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeRawBytes(final byte[] bytes) throws IOException {
|
||||
assert bytes != null;
|
||||
encodeStream.write(bytes);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Writing typed values (have type byte)
|
||||
|
|
|
|||
|
|
@ -56,79 +56,8 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
this.nFields = nFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
|
||||
// load our bytep[] data into the decoder
|
||||
final BCF2Decoder decoder = new BCF2Decoder((byte[])data);
|
||||
|
||||
// go ahead and decode everyone
|
||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
// all of the information we need for each genotype, with default values
|
||||
final String sampleName = samples.get(i);
|
||||
List<Allele> alleles = null;
|
||||
boolean isPhased = false;
|
||||
double log10PError = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = null;
|
||||
double[] log10Likelihoods = null;
|
||||
|
||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
Object value = entry.getValue().get(i);
|
||||
try {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteAlleles, (List<Integer>)value);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
log10PError = ((Integer)value) / -10.0;
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final List<Integer> pls = (List<Integer>)value;
|
||||
if ( pls != null ) { // we have a PL field
|
||||
log10Likelihoods = new double[pls.size()];
|
||||
for ( int j = 0; j < log10Likelihoods.length; j++ ) {
|
||||
final double d = pls.get(j);
|
||||
log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
|
||||
}
|
||||
}
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
if ( value instanceof List && ((List)value).size() == 1)
|
||||
value = ((List)value).get(0);
|
||||
attributes.put(field, value);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value in the "
|
||||
+ " BCF file. Value was " + value);
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||
|
||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||
}
|
||||
|
||||
// public LazyGenotypesContext.LazyData parse2(final Object data) {
|
||||
// @Override
|
||||
// public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
// logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
//
|
||||
// // load our bytep[] data into the decoder
|
||||
|
|
@ -144,40 +73,43 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
//
|
||||
// final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
|
||||
// final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
// final GenotypeBuilder gb = new GenotypeBuilder();
|
||||
// for ( int i = 0; i < nSamples; i++ ) {
|
||||
// // all of the information we need for each genotype, with default values
|
||||
// gb.reset();
|
||||
// gb.name(samples.get(i));
|
||||
// final String sampleName = samples.get(i);
|
||||
// List<Allele> alleles = null;
|
||||
// boolean isPhased = false;
|
||||
// double log10PError = VariantContext.NO_LOG10_PERROR;
|
||||
// Set<String> filters = null;
|
||||
// Map<String, Object> attributes = null;
|
||||
// double[] log10Likelihoods = null;
|
||||
//
|
||||
// for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
// final String field = entry.getKey();
|
||||
// Object value = entry.getValue().get(i);
|
||||
// try {
|
||||
// if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
// gb.alleles(decodeGenotypeAlleles(siteAlleles, (List<Integer>)value));
|
||||
// } else if ( field.equals(VCFConstants.DEPTH_KEY) ) {
|
||||
// if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
// gb.DP((Integer)value);
|
||||
// alleles = decodeGenotypeAlleles(siteAlleles, (List<Integer>)value);
|
||||
// } else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
// if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
// gb.GQ((Integer)value);
|
||||
// log10PError = ((Integer)value) / -10.0;
|
||||
// } else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
// final int[] PLs = decodeIntArray(value);
|
||||
// if ( PLs != null )
|
||||
// gb.PL(PLs);
|
||||
// } else if ( field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS) ) {
|
||||
// final int[] AD = decodeIntArray(value);
|
||||
// if ( AD != null )
|
||||
// gb.AD(AD);
|
||||
// final List<Integer> pls = (List<Integer>)value;
|
||||
// if ( pls != null ) { // we have a PL field
|
||||
// log10Likelihoods = new double[pls.size()];
|
||||
// for ( int j = 0; j < log10Likelihoods.length; j++ ) {
|
||||
// final double d = pls.get(j);
|
||||
// log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
|
||||
// }
|
||||
// }
|
||||
// } else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
// throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
// //filters = new HashSet<String>(values.get(i));
|
||||
// } else { // add to attributes
|
||||
// if ( value != null ) { // don't add missing values
|
||||
// if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
// if ( value instanceof List && ((List)value).size() == 1)
|
||||
// value = ((List)value).get(0);
|
||||
// gb.attribute(field, value);
|
||||
// attributes.put(field, value);
|
||||
// }
|
||||
// }
|
||||
// } catch ( ClassCastException e ) {
|
||||
|
|
@ -187,13 +119,82 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
// }
|
||||
// }
|
||||
//
|
||||
// final Genotype g = gb.make();
|
||||
// if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||
//
|
||||
// final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
// genotypes.add(g);
|
||||
// }
|
||||
//
|
||||
// return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||
// }
|
||||
|
||||
@Override
|
||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
|
||||
// load our bytep[] data into the decoder
|
||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||
|
||||
// go ahead and decode everyone
|
||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder();
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
// all of the information we need for each genotype, with default values
|
||||
gb.reset();
|
||||
gb.name(samples.get(i));
|
||||
|
||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
Object value = entry.getValue().get(i);
|
||||
try {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
gb.alleles(decodeGenotypeAlleles(siteAlleles, (List<Integer>)value));
|
||||
} else if ( field.equals(VCFConstants.DEPTH_KEY) ) {
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
gb.DP((Integer)value);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
gb.GQ((Integer)value);
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final int[] PLs = decodeIntArray(value);
|
||||
if ( PLs != null )
|
||||
gb.PL(PLs);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS) ) {
|
||||
final int[] AD = decodeIntArray(value);
|
||||
if ( AD != null )
|
||||
gb.AD(AD);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( value instanceof List && ((List)value).size() == 1)
|
||||
value = ((List)value).get(0);
|
||||
gb.attribute(field, value);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value in the "
|
||||
+ " BCF file. Value was " + value);
|
||||
}
|
||||
}
|
||||
|
||||
final Genotype g = gb.make();
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||
}
|
||||
|
||||
private final int[] decodeIntArray(final Object value) {
|
||||
// todo -- decode directly into int[]
|
||||
final List<Integer> pls = (List<Integer>)value;
|
||||
|
|
@ -242,10 +243,15 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final int offset = (Integer) decoder.decodeTypedValue();
|
||||
final String field = codec.getDictionaryString(offset);
|
||||
|
||||
// the type of each element
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
|
||||
assert ! map.containsKey(field);
|
||||
|
||||
map.put(field, values);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
|
|
@ -33,9 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Common utilities for working with BCF2 files
|
||||
|
|
@ -77,11 +76,16 @@ public class BCF2Utils {
|
|||
* The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
|
||||
* fields.
|
||||
*
|
||||
* Note that its critical that the list be dedupped and sorted in a consistent manner each time,
|
||||
* as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
|
||||
* the same way as in the header each time it's very bad
|
||||
*
|
||||
* @param header the VCFHeader from which to build the dictionary
|
||||
* @return a non-null dictionary of elements, may be empty
|
||||
*/
|
||||
@Ensures("new HashSet(result).size() == result.size()")
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
final Set<String> dict = new TreeSet<String>();
|
||||
|
||||
// set up the strings dictionary
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
|
@ -92,7 +96,7 @@ public class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
return dict;
|
||||
return new ArrayList<String>(dict);
|
||||
}
|
||||
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
protected final String[] locParts = new String[6];
|
||||
|
||||
// for performance we cache the hashmap of filter encodings for quick lookup
|
||||
protected HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
|
||||
protected HashMap<String,List<String>> filterHash = new HashMap<String,List<String>>();
|
||||
|
||||
// we store a name to give to each of the variant contexts we emit
|
||||
protected String name = "Unknown";
|
||||
|
|
@ -108,7 +108,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected abstract Set<String> parseFilters(String filterString);
|
||||
protected abstract List<String> parseFilters(String filterString);
|
||||
|
||||
/**
|
||||
* create a VCF header from a set of header record lines
|
||||
|
|
@ -320,7 +320,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
String ref = getCachedString(parts[3].toUpperCase());
|
||||
String alts = getCachedString(parts[4].toUpperCase());
|
||||
builder.log10PError(parseQual(parts[5]));
|
||||
builder.filters(parseFilters(getCachedString(parts[6])));
|
||||
|
||||
final List<String> filters = parseFilters(getCachedString(parts[6]));
|
||||
if ( filters != null ) builder.filters(new HashSet<String>(filters));
|
||||
final Map<String, Object> attrs = parseInfo(parts[7]);
|
||||
builder.attributes(attrs);
|
||||
|
||||
|
|
|
|||
|
|
@ -78,24 +78,24 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new ArrayList<String>();
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
return fFields;
|
||||
return new ArrayList<String>(fFields);
|
||||
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status");
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
return new ArrayList<String>(filterHash.get(filterString));
|
||||
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
|
|
@ -141,7 +141,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
List<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
|
|
@ -181,12 +181,10 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
final Genotype g = new GenotypeBuilder(sampleName)
|
||||
.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap))
|
||||
.GQ(GTQual).filters(genotypeFilters).attributes(gtAttributes).phased(phased).make();
|
||||
genotypes.add(g);
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -127,38 +127,33 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
return parseFilters(filterHash, lineNo, filterString);
|
||||
}
|
||||
|
||||
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||
return Collections.emptySet();
|
||||
return Collections.emptyList();
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( cache != null && cache.containsKey(filterString) )
|
||||
return Collections.unmodifiableSet(cache.get(filterString));
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new LinkedList<String>();
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
fFields.add(filterString);
|
||||
else
|
||||
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
||||
|
||||
fFields = fFields;
|
||||
if ( cache != null ) cache.put(filterString, fFields);
|
||||
filterHash.put(filterString, Collections.unmodifiableList(fFields));
|
||||
|
||||
return Collections.unmodifiableSet(fFields);
|
||||
return fFields;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -192,10 +187,8 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
final String sampleName = sampleNameIterator.next();
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
|
|
@ -203,23 +196,34 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
|
||||
gb.maxAttributes(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = new String(genotypeKeyArray[i]);
|
||||
final String gtKey = genotypeKeyArray[i];
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
|
||||
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
|
||||
if ( filters != null ) gb.filters(filters);
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// don't add missing values to the map
|
||||
} else {
|
||||
gtAttributes.put(gtKey, GTValueArray[i]);
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
gb.GQ(Double.valueOf(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
gb.AD(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.DEPTH_KEY)) {
|
||||
gb.DP(Integer.valueOf(GTValueArray[i]));
|
||||
} else {
|
||||
gb.attribute(gtKey, GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -230,12 +234,13 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
final List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
gb.alleles(GTalleles);
|
||||
gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1);
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
|
||||
genotypes.add(gb.make());
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
|
|
@ -244,6 +249,14 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset());
|
||||
}
|
||||
|
||||
private final static int[] decodeInts(final String string) {
|
||||
final String[] splits = string.split(",");
|
||||
final int[] values = new int[splits.length];
|
||||
for ( int i = 0; i < splits.length; i++ )
|
||||
values[i] = Integer.valueOf(splits[i]);
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode(final String potentialInput) {
|
||||
return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ import java.util.*;
|
|||
* @author Mark DePristo
|
||||
* @since 05/12
|
||||
*/
|
||||
public final class FastGenotype implements Comparable<FastGenotype> {
|
||||
public final class FastGenotype implements Genotype {
|
||||
private final String sampleName;
|
||||
private final List<Allele> alleles;
|
||||
private final boolean isPhased;
|
||||
|
|
@ -87,7 +87,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
private final int[] PL;
|
||||
private final Map<String, Object> extendedAttributes;
|
||||
|
||||
private Type type = null;
|
||||
private GenotypeType type = null;
|
||||
|
||||
/**
|
||||
* The only way to make one of these, for use by GenotypeBuilder only
|
||||
|
|
@ -168,7 +168,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
@Requires("i >=0 && i < getPloidy()")
|
||||
@Ensures("result != null")
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
if ( getType() == GenotypeType.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
|
@ -229,26 +229,11 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public enum Type {
|
||||
/** The sample is no-called (all alleles are NO_CALL */
|
||||
NO_CALL,
|
||||
/** The sample is homozygous reference */
|
||||
HOM_REF,
|
||||
/** The sample is heterozygous, with at least one ref and at least one one alt in any order */
|
||||
HET,
|
||||
/** All alleles are non-reference */
|
||||
HOM_VAR,
|
||||
/** There is no allele data availble for this sample (alleles.isEmpty) */
|
||||
UNAVAILABLE,
|
||||
/** Some chromosomes are NO_CALL and others are called */
|
||||
MIXED // no-call and call in the same genotype
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the high-level type of this sample's genotype
|
||||
*/
|
||||
@Ensures({"type != null", "result != null"})
|
||||
public Type getType() {
|
||||
public GenotypeType getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
|
|
@ -259,10 +244,10 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
* Internal code to determine the type of the genotype from the alleles vector
|
||||
* @return the type
|
||||
*/
|
||||
protected Type determineType() {
|
||||
protected GenotypeType determineType() {
|
||||
// TODO -- this code is slow and could be optimized for the diploid case
|
||||
if ( alleles.isEmpty() )
|
||||
return Type.UNAVAILABLE;
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
|
||||
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||
Allele observedAllele = null;
|
||||
|
|
@ -278,14 +263,14 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
|
||||
if ( sawNoCall ) {
|
||||
if ( observedAllele == null )
|
||||
return Type.NO_CALL;
|
||||
return Type.MIXED;
|
||||
return GenotypeType.NO_CALL;
|
||||
return GenotypeType.MIXED;
|
||||
}
|
||||
|
||||
if ( observedAllele == null )
|
||||
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||
|
||||
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -296,37 +281,37 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
/**
|
||||
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHet() { return getType() == Type.HET; }
|
||||
public boolean isHet() { return getType() == GenotypeType.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||
*/
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of both calls and no-calls.
|
||||
*/
|
||||
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
|
||||
|
||||
/**
|
||||
* @return true if the type of this genotype is set.
|
||||
*/
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -450,15 +435,15 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final FastGenotype genotype) {
|
||||
public int compareTo(final Genotype genotype) {
|
||||
return getSampleName().compareTo(genotype.getSampleName());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(final FastGenotype other) {
|
||||
public boolean sameGenotype(final Genotype other) {
|
||||
return sameGenotype(other, true);
|
||||
}
|
||||
|
||||
public boolean sameGenotype(final FastGenotype other, boolean ignorePhase) {
|
||||
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
|
||||
if (getPloidy() != other.getPloidy())
|
||||
return false; // gotta have the same number of allele to be equal
|
||||
|
||||
|
|
@ -515,6 +500,11 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
return hasAttribute(key) ? extendedAttributes.get(key) : defaultValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getAttribute(final String key) {
|
||||
return getAttribute(key, null);
|
||||
}
|
||||
|
||||
// TODO -- add getAttributesAsX interface here
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
|
|
@ -584,7 +574,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
* manage inline in the Genotype object. They must not appear in the
|
||||
* extended attributes map
|
||||
*/
|
||||
private final static Collection<String> FORBIDDEN_KEYS = Arrays.asList(
|
||||
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
|
||||
VCFConstants.GENOTYPE_KEY,
|
||||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||
VCFConstants.DEPTH_KEY,
|
||||
|
|
@ -599,7 +589,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
* @return
|
||||
*/
|
||||
private final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
|
||||
for ( final String forbidden : FORBIDDEN_KEYS )
|
||||
for ( final String forbidden : PRIMARY_KEYS)
|
||||
if ( attributes.containsKey(forbidden) )
|
||||
return true;
|
||||
return false;
|
||||
|
|
@ -617,4 +607,44 @@ public final class FastGenotype implements Comparable<FastGenotype> {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override public boolean hasPL() { return PL != null; }
|
||||
@Override public boolean hasAD() { return AD != null; }
|
||||
@Override public boolean hasGQ() { return GQ != -1; }
|
||||
@Override public boolean hasDP() { return DP != -1; }
|
||||
|
||||
// TODO -- remove me
|
||||
@Override public List<String> getFilters() {
|
||||
return (List<String>)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
|
||||
}
|
||||
@Override public boolean isFiltered() { return false; }
|
||||
@Override public boolean filtersWereApplied() { return false; }
|
||||
@Override public boolean hasLog10PError() { return hasGQ(); }
|
||||
@Override public double getLog10PError() { return getGQ() / -10.0; }
|
||||
|
||||
@Override public int getPhredScaledQual() { return getGQ(); }
|
||||
|
||||
@Override
|
||||
public String getAttributeAsString(String key, String defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof String ) return (String)x;
|
||||
return String.valueOf(x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAttributeAsInt(String key, int defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
|
||||
if ( x instanceof Integer ) return (Integer)x;
|
||||
return Integer.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getAttributeAsDouble(String key, double defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof Double ) return (Double)x;
|
||||
return Double.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
}
|
||||
|
|
@ -12,366 +12,99 @@ import java.util.*;
|
|||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class Genotype implements Comparable<Genotype> {
|
||||
public interface Genotype extends Comparable<Genotype> {
|
||||
List<Allele> getAlleles();
|
||||
|
||||
public final static String PHASED_ALLELE_SEPARATOR = "|";
|
||||
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
|
||||
int countAllele(final Allele allele);
|
||||
|
||||
protected CommonInfo commonInfo;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
protected Type type = null;
|
||||
Allele getAllele(int i);
|
||||
|
||||
protected boolean isPhased = false;
|
||||
boolean isPhased();
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
|
||||
this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
|
||||
}
|
||||
int getPloidy();
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||
if ( alleles == null || alleles.isEmpty() )
|
||||
this.alleles = Collections.emptyList();
|
||||
else
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
}
|
||||
GenotypeType getType();
|
||||
|
||||
/**
|
||||
* Creates a new Genotype for sampleName with genotype according to alleles.
|
||||
* @param sampleName
|
||||
* @param alleles
|
||||
* @param log10PError the confidence in these alleles
|
||||
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||
*/
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
|
||||
this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
|
||||
}
|
||||
boolean isHom();
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError) {
|
||||
this(sampleName, alleles, log10PError, null, null, false);
|
||||
}
|
||||
boolean isHomRef();
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles) {
|
||||
this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
|
||||
}
|
||||
boolean isHomVar();
|
||||
|
||||
public Genotype(String sampleName, Genotype parent) {
|
||||
this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
|
||||
}
|
||||
boolean isHet();
|
||||
|
||||
boolean isNoCall();
|
||||
|
||||
boolean isCalled();
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Partial-cloning routines (because Genotype is immutable).
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
boolean isMixed();
|
||||
|
||||
public static Genotype modifyName(Genotype g, String name) {
|
||||
return new Genotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
}
|
||||
|
||||
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
||||
}
|
||||
|
||||
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
||||
return new Genotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles(Allele allele) {
|
||||
List<Allele> al = new ArrayList<Allele>();
|
||||
for ( Allele a : alleles )
|
||||
if ( a.equals(allele) )
|
||||
al.add(a);
|
||||
|
||||
return Collections.unmodifiableList(al);
|
||||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
public boolean isPhased() { return isPhased; }
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype. 0 if the site is no-called.
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return alleles.size();
|
||||
}
|
||||
|
||||
public enum Type {
|
||||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR,
|
||||
UNAVAILABLE,
|
||||
MIXED // no-call and call in the same genotype
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
protected Type determineType() {
|
||||
if ( alleles.size() == 0 )
|
||||
return Type.UNAVAILABLE;
|
||||
|
||||
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||
Allele observedAllele = null;
|
||||
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isNoCall() )
|
||||
sawNoCall = true;
|
||||
else if ( observedAllele == null )
|
||||
observedAllele = allele;
|
||||
else if ( !allele.equals(observedAllele) )
|
||||
sawMultipleAlleles = true;
|
||||
}
|
||||
|
||||
if ( sawNoCall ) {
|
||||
if ( observedAllele == null )
|
||||
return Type.NO_CALL;
|
||||
return Type.MIXED;
|
||||
}
|
||||
|
||||
if ( observedAllele == null )
|
||||
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||
|
||||
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHom() { return isHomRef() || isHomVar(); }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHet() { return getType() == Type.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||
*/
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of both calls and no-calls.
|
||||
*/
|
||||
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||
|
||||
/**
|
||||
* @return true if the type of this genotype is set.
|
||||
*/
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
boolean isAvailable();
|
||||
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
//
|
||||
public boolean hasLikelihoods() {
|
||||
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
|
||||
}
|
||||
boolean hasLikelihoods();
|
||||
|
||||
/**
|
||||
* Convenience function that returns a string representation of the PL field of this
|
||||
* genotype, or . if none is available.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getLikelihoodsString() {
|
||||
GenotypeLikelihoods gl = getLikelihoods();
|
||||
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods getLikelihoods() {
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
|
||||
if ( x != null )
|
||||
return x;
|
||||
else {
|
||||
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
String getLikelihoodsString();
|
||||
|
||||
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x instanceof String ) {
|
||||
if ( asPL )
|
||||
return GenotypeLikelihoods.fromPLField((String)x);
|
||||
else
|
||||
return GenotypeLikelihoods.fromGLField((String)x);
|
||||
}
|
||||
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
|
||||
else return null;
|
||||
}
|
||||
GenotypeLikelihoods getLikelihoods();
|
||||
|
||||
public void validate() {
|
||||
if ( alleles.size() == 0) return;
|
||||
String getGenotypeString();
|
||||
|
||||
// int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
// nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
}
|
||||
String getGenotypeString(boolean ignoreRefState);
|
||||
|
||||
// Technically, the spec does allow for the below case so this is not an illegal state
|
||||
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
}
|
||||
String toBriefString();
|
||||
|
||||
public String getGenotypeString() {
|
||||
return getGenotypeString(true);
|
||||
}
|
||||
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
if ( alleles.size() == 0 )
|
||||
return null;
|
||||
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
|
||||
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
|
||||
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
|
||||
}
|
||||
|
||||
private List<String> getAlleleStrings() {
|
||||
List<String> al = new ArrayList<String>();
|
||||
for ( Allele a : alleles )
|
||||
al.add(a.getBaseString());
|
||||
|
||||
return al;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
int Q = getPhredScaledQual();
|
||||
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
|
||||
Q == -1 ? "." : String.format("%2d",Q), sortedString(getAttributes()));
|
||||
}
|
||||
|
||||
public String toBriefString() {
|
||||
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other) {
|
||||
return sameGenotype(other, true);
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
|
||||
if (getPloidy() != other.getPloidy())
|
||||
return false; // gotta have the same number of allele to be equal
|
||||
|
||||
// By default, compare the elements in the lists of alleles, element-by-element
|
||||
Collection<Allele> thisAlleles = this.getAlleles();
|
||||
Collection<Allele> otherAlleles = other.getAlleles();
|
||||
|
||||
if (ignorePhase) { // do not care about order, only identity of Alleles
|
||||
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
|
||||
otherAlleles = new TreeSet<Allele>(otherAlleles);
|
||||
}
|
||||
|
||||
return thisAlleles.equals(otherAlleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* a utility method for generating sorted strings from a map key set.
|
||||
* @param c the map
|
||||
* @param <T> the key type
|
||||
* @param <V> the value type
|
||||
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
|
||||
*/
|
||||
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
||||
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
|
||||
List<T> t = new ArrayList<T>(c.keySet());
|
||||
Collections.sort(t);
|
||||
|
||||
List<String> pairs = new ArrayList<String>();
|
||||
for (T k : t) {
|
||||
pairs.add(k + "=" + c.get(k));
|
||||
}
|
||||
|
||||
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
|
||||
}
|
||||
boolean sameGenotype(Genotype other);
|
||||
|
||||
boolean sameGenotype(Genotype other, boolean ignorePhase);
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// get routines to access context info fields
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public String getSampleName() { return commonInfo.getName(); }
|
||||
public Set<String> getFilters() { return commonInfo.getFilters(); }
|
||||
public Set<String> getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); }
|
||||
public boolean isFiltered() { return commonInfo.isFiltered(); }
|
||||
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
|
||||
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
String getSampleName();
|
||||
|
||||
/**
|
||||
* Returns a phred-scaled quality score, or -1 if none is available
|
||||
* @return
|
||||
*/
|
||||
public int getPhredScaledQual() {
|
||||
final int i = (int)Math.round(commonInfo.getPhredScaledQual());
|
||||
return i < 0 ? -1 : i;
|
||||
}
|
||||
public int[] getPL();
|
||||
public boolean hasPL();
|
||||
|
||||
public Map<String, Object> getAttributes() { return commonInfo.getAttributes(); }
|
||||
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
|
||||
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
|
||||
|
||||
public Object getAttribute(String key, Object defaultValue) {
|
||||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
public int getDP();
|
||||
public boolean hasDP();
|
||||
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
public int[] getAD();
|
||||
public boolean hasAD();
|
||||
|
||||
/**
|
||||
* comparable genotypes -> compareTo on the sample names
|
||||
* @param genotype
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final Genotype genotype) {
|
||||
return getSampleName().compareTo(genotype.getSampleName());
|
||||
}
|
||||
public int getGQ();
|
||||
public boolean hasGQ();
|
||||
|
||||
List<String> getFilters();
|
||||
boolean isFiltered();
|
||||
boolean filtersWereApplied();
|
||||
|
||||
@Deprecated
|
||||
boolean hasLog10PError();
|
||||
|
||||
@Deprecated
|
||||
double getLog10PError();
|
||||
|
||||
@Deprecated
|
||||
int getPhredScaledQual();
|
||||
|
||||
public boolean hasAttribute(final String key);
|
||||
|
||||
Map<String, Object> getExtendedAttributes();
|
||||
|
||||
Object getAttribute(String key);
|
||||
Object getAttribute(final String key, final Object defaultValue);
|
||||
|
||||
@Deprecated
|
||||
String getAttributeAsString(String key, String defaultValue);
|
||||
|
||||
@Deprecated
|
||||
int getAttributeAsInt(String key, int defaultValue);
|
||||
|
||||
@Deprecated
|
||||
double getAttributeAsDouble(String key, double defaultValue);
|
||||
}
|
||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -36,6 +37,8 @@ import java.util.*;
|
|||
* @author Mark DePristo
|
||||
*/
|
||||
public final class GenotypeBuilder {
|
||||
public static boolean MAKE_FAST_BY_DEFAULT = false;
|
||||
|
||||
private String sampleName = null;
|
||||
private List<Allele> alleles = null;
|
||||
|
||||
|
|
@ -46,8 +49,41 @@ public final class GenotypeBuilder {
|
|||
private int[] PL = null;
|
||||
private Map<String, Object> extendedAttributes = null;
|
||||
|
||||
private boolean useFast = MAKE_FAST_BY_DEFAULT;
|
||||
|
||||
private final static Map<String, Object> NO_ATTRIBUTES =
|
||||
new HashMap<String, Object>(0);
|
||||
Collections.unmodifiableMap(new HashMap<String, Object>(0));
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Factory methods
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
public final static Genotype create(final String sampleName, final List<Allele> alleles) {
|
||||
return new GenotypeBuilder(sampleName, alleles).make();
|
||||
}
|
||||
|
||||
public final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final Map<String, Object> attributes) {
|
||||
return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make();
|
||||
}
|
||||
|
||||
protected final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final double[] gls) {
|
||||
return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
|
||||
}
|
||||
|
||||
public final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final double log10Perror,
|
||||
final Map<String, Object> attributes) {
|
||||
return new GenotypeBuilder(sampleName, alleles)
|
||||
.GQ(log10Perror == SlowGenotype.NO_LOG10_PERROR ? -1 : (int)(log10Perror * -10))
|
||||
.attributes(attributes).make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a empty builder. Both a sampleName and alleles must be provided
|
||||
|
|
@ -78,7 +114,7 @@ public final class GenotypeBuilder {
|
|||
* Create a new builder starting with the values in Genotype g
|
||||
* @param g
|
||||
*/
|
||||
public GenotypeBuilder(final FastGenotype g) {
|
||||
public GenotypeBuilder(final Genotype g) {
|
||||
copy(g);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +123,7 @@ public final class GenotypeBuilder {
|
|||
* @param g
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder copy(final FastGenotype g) {
|
||||
public GenotypeBuilder copy(final Genotype g) {
|
||||
name(g.getSampleName());
|
||||
alleles(g.getAlleles());
|
||||
phased(g.isPhased());
|
||||
|
|
@ -125,9 +161,31 @@ public final class GenotypeBuilder {
|
|||
* @return a newly minted Genotype object with values provided from this builder
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public FastGenotype make() {
|
||||
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
|
||||
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
|
||||
public Genotype make() {
|
||||
if ( useFast ) {
|
||||
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
|
||||
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
|
||||
} else {
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
|
||||
final double log10PError = GQ != -1 ? GQ / -10.0 : SlowGenotype.NO_LOG10_PERROR;
|
||||
|
||||
Set<String> filters = Collections.emptySet();
|
||||
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
filters = new LinkedHashSet<String>((List<String>)extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
}
|
||||
|
||||
if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
|
||||
if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
|
||||
final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
|
||||
return new SlowGenotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10likelihoods);
|
||||
}
|
||||
}
|
||||
|
||||
public GenotypeBuilder useFast(boolean useFast) {
|
||||
this.useFast = useFast;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Requires({"sampleName != null"})
|
||||
|
|
@ -152,12 +210,21 @@ public final class GenotypeBuilder {
|
|||
}
|
||||
|
||||
@Requires({"GQ >= -1"})
|
||||
@Ensures({"this.GQ == GQ"})
|
||||
@Ensures({"this.GQ == GQ", "this.GQ >= -1"})
|
||||
public GenotypeBuilder GQ(final int GQ) {
|
||||
this.GQ = GQ;
|
||||
return this;
|
||||
}
|
||||
|
||||
public GenotypeBuilder GQ(final double GQ) {
|
||||
return GQ((int)Math.round(GQ));
|
||||
}
|
||||
|
||||
public GenotypeBuilder noGQ() { GQ = -1; return this; }
|
||||
public GenotypeBuilder noAD() { AD = null; return this; }
|
||||
public GenotypeBuilder noDP() { DP = -1; return this; }
|
||||
public GenotypeBuilder noPL() { PL = null; return this; }
|
||||
|
||||
@Requires({"DP >= -1"})
|
||||
@Ensures({"this.DP == DP"})
|
||||
public GenotypeBuilder DP(final int DP) {
|
||||
|
|
@ -179,8 +246,15 @@ public final class GenotypeBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
@Requires("PL == null || PL.length > 0")
|
||||
@Ensures({"this.PL == PL"})
|
||||
public GenotypeBuilder PL(final double[] GLs) {
|
||||
this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).getAsPLs();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Requires("attributes != null")
|
||||
@Ensures("extendedAttributes != null")
|
||||
@Ensures("attributes.isEmpty() || extendedAttributes != null")
|
||||
public GenotypeBuilder attributes(final Map<String, Object> attributes) {
|
||||
for ( Map.Entry<String, Object> pair : attributes.entrySet() )
|
||||
attribute(pair.getKey(), pair.getValue());
|
||||
|
|
@ -195,4 +269,15 @@ public final class GenotypeBuilder {
|
|||
extendedAttributes.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
public GenotypeBuilder filters(final List<String> filters) {
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
// TODO
|
||||
return this;
|
||||
}
|
||||
|
||||
public GenotypeBuilder maxAttributes(final int i) {
|
||||
// TODO
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -122,25 +122,25 @@ public class GenotypeLikelihoods {
|
|||
|
||||
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
|
||||
//Returns null in case of missing likelihoods
|
||||
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
|
||||
public EnumMap<GenotypeType,Double> getAsMap(boolean normalizeFromLog10){
|
||||
//Make sure that the log10likelihoods are set
|
||||
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||
if(likelihoods == null)
|
||||
return null;
|
||||
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
|
||||
EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
|
||||
return likelihoodsMap;
|
||||
}
|
||||
|
||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
public double getLog10GQ(Genotype.Type genotype){
|
||||
return getQualFromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
public double getLog10GQ(GenotypeType genotype){
|
||||
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
}
|
||||
|
||||
public static double getQualFromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
if(likelihoods == null)
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
/**
|
||||
* Summary types for Genotype objects
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public enum GenotypeType {
|
||||
/** The sample is no-called (all alleles are NO_CALL */
|
||||
NO_CALL,
|
||||
/** The sample is homozygous reference */
|
||||
HOM_REF,
|
||||
/** The sample is heterozygous, with at least one ref and at least one one alt in any order */
|
||||
HET,
|
||||
/** All alleles are non-reference */
|
||||
HOM_VAR,
|
||||
/** There is no allele data availble for this sample (alleles.isEmpty) */
|
||||
UNAVAILABLE,
|
||||
/** Some chromosomes are NO_CALL and others are called */
|
||||
MIXED // no-call and call in the same genotype
|
||||
}
|
||||
|
|
@ -272,6 +272,17 @@ public class GenotypesContext implements List<Genotype> {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Lazy methods
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public boolean isLazyWithData() {
|
||||
return this instanceof LazyGenotypesContext &&
|
||||
((LazyGenotypesContext)this).getUnparsedGenotypeData() != null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Map methods
|
||||
|
|
|
|||
|
|
@ -0,0 +1,445 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class encompasses all the basic information about a genotype. It is immutable.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class SlowGenotype implements Genotype {
|
||||
public final static String PHASED_ALLELE_SEPARATOR = "|";
|
||||
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
|
||||
|
||||
protected CommonInfo commonInfo;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
protected GenotypeType type = null;
|
||||
|
||||
protected boolean isPhased = false;
|
||||
|
||||
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
|
||||
// this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
|
||||
// }
|
||||
|
||||
protected SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||
if ( alleles == null || alleles.isEmpty() )
|
||||
this.alleles = Collections.emptyList();
|
||||
else
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Creates a new Genotype for sampleName with genotype according to alleles.
|
||||
// * @param sampleName
|
||||
// * @param alleles
|
||||
// * @param log10PError the confidence in these alleles
|
||||
// * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||
// */
|
||||
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
|
||||
// this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
|
||||
// }
|
||||
//
|
||||
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError) {
|
||||
// this(sampleName, alleles, log10PError, null, null, false);
|
||||
// }
|
||||
//
|
||||
// public SlowGenotype(String sampleName, List<Allele> alleles) {
|
||||
// this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
|
||||
// }
|
||||
//
|
||||
// public SlowGenotype(String sampleName, SlowGenotype parent) {
|
||||
// this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
|
||||
// }
|
||||
//
|
||||
//
|
||||
//
|
||||
// // ---------------------------------------------------------------------------------------------------------
|
||||
// //
|
||||
// // Partial-cloning routines (because Genotype is immutable).
|
||||
// //
|
||||
// // ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// public static SlowGenotype modifyName(SlowGenotype g, String name) {
|
||||
// return new SlowGenotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
// }
|
||||
//
|
||||
// public static SlowGenotype modifyAttributes(SlowGenotype g, Map<String, Object> attributes) {
|
||||
// return new SlowGenotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
||||
// }
|
||||
//
|
||||
// public static SlowGenotype modifyAlleles(SlowGenotype g, List<Allele> alleles) {
|
||||
// return new SlowGenotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public int countAllele(final Allele allele) {
|
||||
int c = 0;
|
||||
for ( final Allele a : alleles )
|
||||
if ( a.equals(allele) )
|
||||
c++;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == GenotypeType.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
public boolean isPhased() { return isPhased; }
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype. 0 if the site is no-called.
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return alleles.size();
|
||||
}
|
||||
|
||||
public GenotypeType getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
protected GenotypeType determineType() {
|
||||
if ( alleles.size() == 0 )
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
|
||||
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||
Allele observedAllele = null;
|
||||
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isNoCall() )
|
||||
sawNoCall = true;
|
||||
else if ( observedAllele == null )
|
||||
observedAllele = allele;
|
||||
else if ( !allele.equals(observedAllele) )
|
||||
sawMultipleAlleles = true;
|
||||
}
|
||||
|
||||
if ( sawNoCall ) {
|
||||
if ( observedAllele == null )
|
||||
return GenotypeType.NO_CALL;
|
||||
return GenotypeType.MIXED;
|
||||
}
|
||||
|
||||
if ( observedAllele == null )
|
||||
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||
|
||||
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHom() { return isHomRef() || isHomVar(); }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHet() { return getType() == GenotypeType.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||
*/
|
||||
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of both calls and no-calls.
|
||||
*/
|
||||
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
|
||||
|
||||
/**
|
||||
* @return true if the type of this genotype is set.
|
||||
*/
|
||||
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
//
|
||||
public boolean hasLikelihoods() {
|
||||
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that returns a string representation of the PL field of this
|
||||
* genotype, or . if none is available.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getLikelihoodsString() {
|
||||
GenotypeLikelihoods gl = getLikelihoods();
|
||||
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods getLikelihoods() {
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
|
||||
if ( x != null )
|
||||
return x;
|
||||
else {
|
||||
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x instanceof String ) {
|
||||
if ( asPL )
|
||||
return GenotypeLikelihoods.fromPLField((String)x);
|
||||
else
|
||||
return GenotypeLikelihoods.fromGLField((String)x);
|
||||
}
|
||||
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
|
||||
else return null;
|
||||
}
|
||||
|
||||
public void validate() {
|
||||
if ( alleles.size() == 0) return;
|
||||
|
||||
// int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
// nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
}
|
||||
|
||||
// Technically, the spec does allow for the below case so this is not an illegal state
|
||||
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
}
|
||||
|
||||
public String getGenotypeString() {
|
||||
return getGenotypeString(true);
|
||||
}
|
||||
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
if ( alleles.size() == 0 )
|
||||
return null;
|
||||
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
|
||||
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
|
||||
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
|
||||
}
|
||||
|
||||
private List<String> getAlleleStrings() {
|
||||
List<String> al = new ArrayList<String>();
|
||||
for ( Allele a : alleles )
|
||||
al.add(a.getBaseString());
|
||||
|
||||
return al;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
int Q = getPhredScaledQual();
|
||||
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
|
||||
Q == -1 ? "." : String.format("%2d",Q), sortedString(getExtendedAttributes()));
|
||||
}
|
||||
|
||||
public String toBriefString() {
|
||||
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other) {
|
||||
return sameGenotype(other, true);
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
|
||||
if (getPloidy() != other.getPloidy())
|
||||
return false; // gotta have the same number of allele to be equal
|
||||
|
||||
// By default, compare the elements in the lists of alleles, element-by-element
|
||||
Collection<Allele> thisAlleles = this.getAlleles();
|
||||
Collection<Allele> otherAlleles = other.getAlleles();
|
||||
|
||||
if (ignorePhase) { // do not care about order, only identity of Alleles
|
||||
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
|
||||
otherAlleles = new TreeSet<Allele>(otherAlleles);
|
||||
}
|
||||
|
||||
return thisAlleles.equals(otherAlleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* a utility method for generating sorted strings from a map key set.
|
||||
* @param c the map
|
||||
* @param <T> the key type
|
||||
* @param <V> the value type
|
||||
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
|
||||
*/
|
||||
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
||||
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
|
||||
List<T> t = new ArrayList<T>(c.keySet());
|
||||
Collections.sort(t);
|
||||
|
||||
List<String> pairs = new ArrayList<String>();
|
||||
for (T k : t) {
|
||||
pairs.add(k + "=" + c.get(k));
|
||||
}
|
||||
|
||||
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public String getSampleName() { return commonInfo.getName(); }
|
||||
public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
|
||||
public boolean isFiltered() { return commonInfo.isFiltered(); }
|
||||
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
|
||||
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
|
||||
/**
|
||||
* Returns a phred-scaled quality score, or -1 if none is available
|
||||
* @return
|
||||
*/
|
||||
public int getPhredScaledQual() {
|
||||
if ( commonInfo.hasLog10PError() )
|
||||
return (int)Math.round(commonInfo.getPhredScaledQual());
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
|
||||
|
||||
@Override
|
||||
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
|
||||
|
||||
public Object getAttribute(String key, Object defaultValue) {
|
||||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
/**
|
||||
* comparable genotypes -> compareTo on the sample names
|
||||
* @param genotype
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final Genotype genotype) {
|
||||
return getSampleName().compareTo(genotype.getSampleName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getPL() {
|
||||
return hasPL() ? getLikelihoods().getAsPLs() : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPL() {
|
||||
return hasLikelihoods();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDP() {
|
||||
return getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDP() {
|
||||
return hasAttribute(VCFConstants.DEPTH_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getAD() {
|
||||
if ( hasAD() ) {
|
||||
return (int[])getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasAD() {
|
||||
return hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getGQ() {
|
||||
return getPhredScaledQual();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasGQ() {
|
||||
return hasLikelihoods();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> getExtendedAttributes() {
|
||||
final Map<String, Object> ea = new LinkedHashMap<String, Object>(commonInfo.getAttributes());
|
||||
for ( final String primary : FastGenotype.PRIMARY_KEYS )
|
||||
ea.remove(primary);
|
||||
return ea;
|
||||
}
|
||||
}
|
||||
|
|
@ -327,19 +327,36 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
|
||||
}
|
||||
/**
|
||||
* This method subsets down to a set of samples.
|
||||
*
|
||||
* At the same time returns the alleles to just those in use by the samples,
|
||||
* if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
|
||||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
|
||||
if ( rederiveAllelesFromGenotypes )
|
||||
builder.alleles(allelesOfGenotypes(newGenotypes));
|
||||
else {
|
||||
builder.alleles(alleles);
|
||||
}
|
||||
|
||||
return builder.genotypes(newGenotypes).make();
|
||||
}
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
return subContextFromSamples(Collections.singleton(sampleName), true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -892,7 +909,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
n += g.getAlleles(a).size();
|
||||
n += g.countAllele(a);
|
||||
}
|
||||
|
||||
return n;
|
||||
|
|
@ -922,7 +939,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
private void calculateGenotypeCounts() {
|
||||
if ( genotypeCounts == null ) {
|
||||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
genotypeCounts = new int[GenotypeType.values().length];
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
genotypeCounts[g.getType().ordinal()]++;
|
||||
|
|
@ -937,7 +954,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getNoCallCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.NO_CALL.ordinal()];
|
||||
return genotypeCounts[GenotypeType.NO_CALL.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -947,7 +964,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHomRefCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_REF.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HOM_REF.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -957,7 +974,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHetCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HET.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HET.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -967,7 +984,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHomVarCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HOM_VAR.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -977,7 +994,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getMixedCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
|
||||
return genotypeCounts[GenotypeType.MIXED.ordinal()];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -1412,8 +1429,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getAttributes(), header);
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), map, g.isPhased());
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
|
||||
return new GenotypeBuilder(g).attributes(map).make();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class VariantContextUtils {
|
|||
|
||||
// if there are alternate alleles, record the relevant tags
|
||||
if ( vc.getAlternateAlleles().size() > 0 ) {
|
||||
ArrayList<String> alleleFreqs = new ArrayList<String>();
|
||||
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
|
||||
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
|
||||
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
|
||||
|
|
@ -109,10 +109,10 @@ public class VariantContextUtils {
|
|||
alleleCounts.add(vc.getCalledChrCount(allele));
|
||||
foundersAlleleCounts.add(foundersAltChromosomes);
|
||||
if ( AN == 0 ) {
|
||||
alleleFreqs.add("0.0");
|
||||
alleleFreqs.add(0.0);
|
||||
} else {
|
||||
// todo -- this is a performance problem
|
||||
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
|
||||
final Double freq = Double.valueOf(String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes)));
|
||||
alleleFreqs.add(freq);
|
||||
}
|
||||
}
|
||||
|
|
@ -167,10 +167,10 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
public static Genotype removePLs(Genotype g) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||
if ( g.hasLikelihoods() )
|
||||
return new GenotypeBuilder(g).noPL().make();
|
||||
else
|
||||
return g;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -257,8 +257,7 @@ public class VariantContextUtils {
|
|||
newGenotypeAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
}
|
||||
genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getLog10PError(),
|
||||
g.getFilters(), g.getAttributes(), g.isPhased()));
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -475,9 +474,9 @@ public class VariantContextUtils {
|
|||
// Genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(),
|
||||
genotypeAttributes, g.isPhased()));
|
||||
// TODO -- fixme
|
||||
//Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
|
||||
//genotypes.add(new GenotypeBuilder(g).attributes(genotypeAttributes).make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).attributes(attributes);
|
||||
|
|
@ -833,7 +832,7 @@ public class VariantContextUtils {
|
|||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -878,7 +877,7 @@ public class VariantContextUtils {
|
|||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
||||
|
|
@ -1073,7 +1072,7 @@ public class VariantContextUtils {
|
|||
|
||||
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
|
||||
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
|
||||
newG = new Genotype(name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased());
|
||||
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
|
||||
}
|
||||
|
||||
mergedGenotypes.add(newG);
|
||||
|
|
@ -1113,7 +1112,7 @@ public class VariantContextUtils {
|
|||
newAllele = Allele.NO_CALL;
|
||||
newAlleles.add(newAllele);
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
|
||||
|
|
@ -1126,11 +1125,11 @@ public class VariantContextUtils {
|
|||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getAttributes().entrySet() ) {
|
||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
||||
|
|
@ -1247,7 +1246,7 @@ public class VariantContextUtils {
|
|||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1268,51 +1267,35 @@ public class VariantContextUtils {
|
|||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
gb.noPL();
|
||||
else
|
||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
||||
else
|
||||
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.GQ(-10 * GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param originalGT the original genotype
|
||||
* @param newLikelihoods the PL array
|
||||
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
||||
* @param attrs the annotations to use when creating the genotype
|
||||
*
|
||||
* @return genotype
|
||||
*/
|
||||
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex1));
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex2));
|
||||
|
||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
||||
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
infoMap.put("isHet", g.isHet() ? "1" : "0");
|
||||
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
|
||||
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getPhredScaledQual());
|
||||
for ( Map.Entry<String, Object> e : g.getAttributes().entrySet() ) {
|
||||
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
|
||||
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
|
||||
infoMap.put(e.getKey(), e.getValue());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
|
|
@ -38,6 +39,7 @@ import java.io.*;
|
|||
import java.util.*;
|
||||
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
private final static boolean FULLY_DECODE = true;
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
|
||||
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
|
|
@ -47,6 +49,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private final boolean doNotWriteGenotypes;
|
||||
|
||||
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
|
||||
IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
|
|
@ -100,7 +103,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
@Override
|
||||
public void add( final VariantContext initialVC ) {
|
||||
final VariantContext vc = initialVC.fullyDecode(header);
|
||||
final VariantContext vc = FULLY_DECODE ? initialVC.fullyDecode(header) : initialVC;
|
||||
super.add(vc); // allow on the fly indexing
|
||||
|
||||
try {
|
||||
|
|
@ -162,7 +165,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// info fields
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
final int nInfo = vc.getAttributes().size();
|
||||
final int nGenotypeFormatFields = VCFWriter.calcVCFGenotypeKeys(vc, header).size();
|
||||
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
|
||||
final int nSamples = vc.getNSamples();
|
||||
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x00FF), BCF2Type.INT32);
|
||||
|
|
@ -176,6 +179,30 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
|
||||
if ( vc.getGenotypes().isLazyWithData() ) {
|
||||
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
|
||||
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
|
||||
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to get the nGenotypeFields as efficiently as possible.
|
||||
*
|
||||
* If this is a lazy BCF2 object just grab the field count from there,
|
||||
* otherwise do the whole counting by types test in the actual data
|
||||
*
|
||||
* @param vc
|
||||
* @return
|
||||
*/
|
||||
private final int getNGenotypeFormatFields(final VariantContext vc) {
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
return lazyData != null ? lazyData.nGenotypeFields : VCFWriter.calcVCFGenotypeKeys(vc, header).size();
|
||||
}
|
||||
|
||||
private void buildID( VariantContext vc ) throws IOException {
|
||||
encoder.encodeTyped(vc.getID(), BCF2Type.CHAR);
|
||||
}
|
||||
|
|
@ -206,86 +233,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
addGenotypes(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
addGQ(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
addGenotypeFilters(vc);
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
addPLs(vc);
|
||||
} else {
|
||||
addGenericGenotypeField(vc, field);
|
||||
}
|
||||
}
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Type matching routines between VCF and BCF
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
assert metaData != null; // field is supposed to be in header
|
||||
|
||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||
} else {
|
||||
return nFields;
|
||||
}
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc) {
|
||||
int size = -1;
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
|
||||
for ( final Genotype g : gc ) {
|
||||
final Object o = g.getAttribute(field);
|
||||
if ( o == null ) continue;
|
||||
if ( o instanceof List ) {
|
||||
// only do compute if first value is of type list
|
||||
size = Math.max(size, ((List)o).size());
|
||||
} else if ( size == -1 )
|
||||
size = 1;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
|
||||
final int numInFormatField = getNGenotypeFieldValues(field, vc);
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
|
||||
|
||||
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
try {
|
||||
final Object fieldValue = g.getAttribute(field);
|
||||
|
||||
if ( numInFormatField == 1 ) {
|
||||
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
|
||||
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
|
||||
} else {
|
||||
// multiple values, need to handle general case
|
||||
final List<Object> asList = toList(fieldValue);
|
||||
final int nSampleValues = asList.size();
|
||||
for ( int i = 0; i < numInFormatField; i++ ) {
|
||||
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
private final class VCFToBCFEncoding {
|
||||
VCFHeaderLineType vcfType;
|
||||
|
|
@ -356,40 +309,116 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// }
|
||||
}
|
||||
|
||||
private final void addGQ(final VariantContext vc) throws IOException {
|
||||
startGenotypeField(VCFConstants.GENOTYPE_QUALITY_KEY, 1, BCF2Type.INT8);
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Genotype field encoding
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
if ( lazyData != null ) {
|
||||
logger.info("Lazy decoded data detected, writing bytes directly to stream. N = " + lazyData.bytes.length);
|
||||
return lazyData.bytes;
|
||||
} else {
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
// we have to do work to convert the VC into a BCF2 byte stream
|
||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
addGenotypes(vc);
|
||||
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
||||
addIntGenotypeField(field, intGenotypeFieldAccessors.getAccessor(field), vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
addGenotypeFilters(vc);
|
||||
} else {
|
||||
addGenericGenotypeField(vc, field);
|
||||
}
|
||||
}
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
}
|
||||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
assert metaData != null; // field is supposed to be in header
|
||||
|
||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc, ige);
|
||||
} else {
|
||||
return nFields;
|
||||
}
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
int size = -1;
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
|
||||
if ( ige == null ) {
|
||||
for ( final Genotype g : gc ) {
|
||||
final Object o = g.getAttribute(field);
|
||||
if ( o == null ) continue;
|
||||
if ( o instanceof List ) {
|
||||
// only do compute if first value is of type list
|
||||
size = Math.max(size, ((List)o).size());
|
||||
} else if ( size == -1 )
|
||||
size = 1;
|
||||
}
|
||||
} else {
|
||||
for ( final Genotype g : gc ) {
|
||||
size = Math.max(size, ige.getSize(g));
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
|
||||
final int numInFormatField = getNGenotypeFieldValues(field, vc, null);
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
|
||||
|
||||
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
if ( g.hasLog10PError() ) {
|
||||
final int GQ = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL);
|
||||
if ( GQ > VCFConstants.MAX_GENOTYPE_QUAL ) throw new ReviewedStingException("Unexpectedly large GQ " + GQ + " at " + vc);
|
||||
encoder.encodeRawValue(GQ, BCF2Type.INT8);
|
||||
} else {
|
||||
encoder.encodeRawMissingValues(1, BCF2Type.INT8);
|
||||
try {
|
||||
final Object fieldValue = g.getAttribute(field);
|
||||
|
||||
if ( numInFormatField == 1 ) {
|
||||
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
|
||||
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
|
||||
} else {
|
||||
// multiple values, need to handle general case
|
||||
final List<Object> asList = toList(fieldValue);
|
||||
final int nSampleValues = asList.size();
|
||||
for ( int i = 0; i < numInFormatField; i++ ) {
|
||||
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Horrible special case to deal with the GenotypeLikelihoods class
|
||||
* @param vc
|
||||
* @throws IOException
|
||||
*/
|
||||
private final void addPLs(final VariantContext vc) throws IOException {
|
||||
final String field = VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY;
|
||||
final int numPLs = getNGenotypeFieldValues(field, vc);
|
||||
private final void addIntGenotypeField(final String field,
|
||||
final IntGenotypeFieldAccessors.Accessor ige,
|
||||
final VariantContext vc) throws IOException {
|
||||
final int numPLs = getNGenotypeFieldValues(field, vc, ige);
|
||||
final int[] allPLs = new int[numPLs * vc.getNSamples()];
|
||||
|
||||
// collect all of the PLs into a single vector of values
|
||||
int i = 0;
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
final GenotypeLikelihoods gls = g.getLikelihoods();
|
||||
final int[] pls = gls != null ? g.getLikelihoods().getAsPLs() : null;
|
||||
final int[] pls = ige.getValues(g);
|
||||
|
||||
if ( pls == null )
|
||||
for ( int j = 0; j < numPLs; j++) allPLs[i++] = -1;
|
||||
else
|
||||
else {
|
||||
assert pls.length == numPLs;
|
||||
for ( int pl : pls ) allPLs[i++] = pl;
|
||||
}
|
||||
}
|
||||
|
||||
// determine the best size
|
||||
|
|
@ -427,6 +456,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level block encoding
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Write the data in the encoder to the outputstream as a length encoded
|
||||
* block of data. After this call the encoder stream will be ready to
|
||||
|
|
@ -488,4 +523,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
encodeStringByRef(key);
|
||||
encoder.encodeType(size, valueType);
|
||||
}
|
||||
|
||||
private final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* A convenient way to provide a single view on the many int and int[] field values we work with,
|
||||
* for writing out the values. This class makes writing out the inline AD, GQ, PL, DP fields
|
||||
* easy and fast
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
class IntGenotypeFieldAccessors {
|
||||
// initialized once per writer to allow parallel writers to work
|
||||
private final HashMap<String, Accessor> intGenotypeFieldEncoders =
|
||||
new HashMap<String, Accessor>();
|
||||
|
||||
public IntGenotypeFieldAccessors() {
|
||||
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new IntGenotypeFieldAccessors.ADAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new IntGenotypeFieldAccessors.PLAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_QUALITY_KEY, new IntGenotypeFieldAccessors.GQAccessor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an accessor for field, or null if none exists
|
||||
* @param field
|
||||
* @return
|
||||
*/
|
||||
public Accessor getAccessor(final String field) {
|
||||
return intGenotypeFieldEncoders.get(field);
|
||||
}
|
||||
|
||||
public static abstract class Accessor {
|
||||
public abstract int[] getValues(final Genotype g);
|
||||
|
||||
public int getSize(final Genotype g) {
|
||||
final int[] v = getValues(g);
|
||||
return v == null ? 0 : v.length;
|
||||
}
|
||||
}
|
||||
|
||||
private static abstract class AtomicAccessor extends Accessor {
|
||||
private final int[] singleton = new int[1];
|
||||
|
||||
@Override
|
||||
public int[] getValues(final Genotype g) {
|
||||
singleton[0] = getValue(g);
|
||||
return singleton[0] == -1 ? null : singleton;
|
||||
}
|
||||
|
||||
public abstract int getValue(final Genotype g);
|
||||
}
|
||||
|
||||
public static class GQAccessor extends AtomicAccessor {
|
||||
@Override public int getValue(final Genotype g) { return Math.min(g.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL); }
|
||||
}
|
||||
|
||||
public static class DPAccessor extends AtomicAccessor {
|
||||
@Override public int getValue(final Genotype g) { return g.getDP(); }
|
||||
}
|
||||
|
||||
public static class ADAccessor extends Accessor {
|
||||
@Override public int[] getValues(final Genotype g) { return g.getAD(); }
|
||||
}
|
||||
|
||||
public static class PLAccessor extends Accessor {
|
||||
@Override public int[] getValues(final Genotype g) { return g.getPL(); }
|
||||
}
|
||||
}
|
||||
|
|
@ -53,28 +53,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
// /**
|
||||
// * create a VCF writer, given a file to write to
|
||||
// *
|
||||
// * @param location the file location to write to
|
||||
// */
|
||||
// public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) {
|
||||
// this(location, openOutputStream(location), refDict, true, false);
|
||||
// }
|
||||
//
|
||||
// public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) {
|
||||
// this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * create a VCF writer, given a stream to write to
|
||||
// *
|
||||
// * @param output the file location to write to
|
||||
// * @param doNotWriteGenotypes do not write genotypes
|
||||
// */
|
||||
// public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) {
|
||||
// this(null, output, refDict, false, doNotWriteGenotypes);
|
||||
// }
|
||||
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
|
|
@ -250,7 +229,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
// FORMAT
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
if ( gc instanceof LazyGenotypesContext && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() != null) {
|
||||
if ( gc.isLazyWithData() && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() instanceof String ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData().toString());
|
||||
} else {
|
||||
|
|
@ -360,9 +339,9 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
for ( String field : genotypeFormatKeys ) {
|
||||
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( !g.isAvailable() ) {
|
||||
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||
}
|
||||
|
|
@ -376,36 +355,50 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
continue;
|
||||
}
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( ! g.hasLog10PError() )
|
||||
val = VCFConstants.MISSING_VALUE_v4;
|
||||
String outputValue;
|
||||
final IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.getAccessor(field);
|
||||
if ( accessor != null ) {
|
||||
final int[] intValues = accessor.getValues(g);
|
||||
if ( intValues == null )
|
||||
outputValue = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( intValues.length == 1 ) // fast path
|
||||
outputValue = Integer.toString(intValues[0]);
|
||||
else {
|
||||
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
}
|
||||
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(intValues[0]);
|
||||
for ( int i = 1; i < intValues.length; i++) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
sb.append(intValues[i]);
|
||||
}
|
||||
val = sb.toString();
|
||||
outputValue = sb.toString();
|
||||
}
|
||||
} else {
|
||||
Object val = g.hasAttribute(field) ? g.getAttribute(field) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
}
|
||||
val = sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
outputValue = formatVCFField(val);
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
String outputValue = formatVCFField(val);
|
||||
if ( outputValue != null )
|
||||
attrs.add(outputValue);
|
||||
}
|
||||
|
|
@ -475,21 +468,24 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
boolean sawGoodGT = false;
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
boolean sawDP = false;
|
||||
boolean sawAD = false;
|
||||
boolean sawPL = false;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.isAvailable() )
|
||||
sawGoodGT = true;
|
||||
if ( g.hasLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
sawGenotypeFilter = true;
|
||||
keys.addAll(g.getExtendedAttributes().keySet());
|
||||
if ( g.isAvailable() ) sawGoodGT = true;
|
||||
if ( g.hasGQ() ) sawGoodQual = true;
|
||||
if ( g.hasDP() ) sawDP = true;
|
||||
if ( g.hasAD() ) sawAD = true;
|
||||
if ( g.hasPL() ) sawPL = true;
|
||||
if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
|
||||
}
|
||||
|
||||
if ( sawGoodQual )
|
||||
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
|
||||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
if ( sawDP ) keys.add(VCFConstants.DEPTH_KEY);
|
||||
if ( sawAD ) keys.add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
if ( sawPL ) keys.add(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
|
|
@ -50,7 +51,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private static Genotype createGenotype(String name, double[] gls) {
|
||||
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
|
||||
return new GenotypeBuilder(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(gls).make();
|
||||
}
|
||||
|
||||
@DataProvider(name = "getGLs")
|
||||
|
|
|
|||
|
|
@ -10,128 +10,128 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexSelection() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSampleExclusion() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("bbd7b28d1c5701e17b395d64f8b6f13d")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testSampleExclusion--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatedLineSelection() {
|
||||
String testfile = testDir + "test.dup.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("77579c53dbde4e8171f3cee83b98351b")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("03abdc27bfd7aa36d57bba0325b31e0d")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordance--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordanceNoSampleSpecified() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("9fb54ed003234a5847c565ffb6767b95")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("76857b016198c3e08a2e27bbdb49f3f0")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testConcordance--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVariantTypeSelection() {
|
||||
String testFile = testDir + "complexExample1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("6c0b0c5f03d26f4a7a1438a2afc9fb6b")
|
||||
);
|
||||
|
||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingDbsnpName() {
|
||||
String testFile = testDir + "combine.3.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("a8a26c621018142c9cba1080cbe687a8")
|
||||
);
|
||||
|
||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRegenotype() {
|
||||
String testFile = testDir + "combine.3.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("6bee6dc2316aa539560a6d9d8adbc4ff")
|
||||
);
|
||||
|
||||
executeTest("testRegenotype--" + testFile, spec);
|
||||
}
|
||||
// @Test
|
||||
// public void testComplexSelection() {
|
||||
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
// 1,
|
||||
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
// executeTest("testComplexSelection--" + testfile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testSampleExclusion() {
|
||||
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
// 1,
|
||||
// Arrays.asList("bbd7b28d1c5701e17b395d64f8b6f13d")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
//
|
||||
// executeTest("testSampleExclusion--" + testfile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testRepeatedLineSelection() {
|
||||
// String testfile = testDir + "test.dup.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
// 1,
|
||||
// Arrays.asList("77579c53dbde4e8171f3cee83b98351b")
|
||||
// );
|
||||
//
|
||||
// executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testDiscordance() {
|
||||
// String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("03abdc27bfd7aa36d57bba0325b31e0d")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
//
|
||||
// executeTest("testDiscordance--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testDiscordanceNoSampleSpecified() {
|
||||
// String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("9fb54ed003234a5847c565ffb6767b95")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
//
|
||||
// executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testConcordance() {
|
||||
// String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("76857b016198c3e08a2e27bbdb49f3f0")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
//
|
||||
// executeTest("testConcordance--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testVariantTypeSelection() {
|
||||
// String testFile = testDir + "complexExample1.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("6c0b0c5f03d26f4a7a1438a2afc9fb6b")
|
||||
// );
|
||||
//
|
||||
// executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testUsingDbsnpName() {
|
||||
// String testFile = testDir + "combine.3.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("a8a26c621018142c9cba1080cbe687a8")
|
||||
// );
|
||||
//
|
||||
// executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testRegenotype() {
|
||||
// String testFile = testDir + "combine.3.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("6bee6dc2316aa539560a6d9d8adbc4ff")
|
||||
// );
|
||||
//
|
||||
// executeTest("testRegenotype--" + testFile, spec);
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testMultipleRecordsAtOnePosition() {
|
||||
|
|
@ -146,58 +146,58 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoGTs() {
|
||||
String testFile = testDir + "vcf4.1.example.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("95c4d43b11c3d0dd3ab19941c474269b")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParallelization2() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParallelization4() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testParallelization (4 threads)--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelectFromMultiAllelic() {
|
||||
String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
|
||||
String samplesFile = testDir + "GIH.samples.list";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("fa92b3b41f1c04f685be8de32afc9706")
|
||||
);
|
||||
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
}
|
||||
// @Test
|
||||
// public void testNoGTs() {
|
||||
// String testFile = testDir + "vcf4.1.example.vcf";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
// 1,
|
||||
// Arrays.asList("95c4d43b11c3d0dd3ab19941c474269b")
|
||||
// );
|
||||
//
|
||||
// executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testParallelization2() {
|
||||
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
// WalkerTestSpec spec;
|
||||
//
|
||||
// spec = new WalkerTestSpec(
|
||||
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
// 1,
|
||||
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
// executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testParallelization4() {
|
||||
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
// WalkerTestSpec spec;
|
||||
// spec = new WalkerTestSpec(
|
||||
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
// 1,
|
||||
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
// );
|
||||
// spec.disableShadowBCF();
|
||||
//
|
||||
// executeTest("testParallelization (4 threads)--" + testfile, spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testSelectFromMultiAllelic() {
|
||||
// String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
|
||||
// String samplesFile = testDir + "GIH.samples.list";
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(
|
||||
// "-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
// 1,
|
||||
// Arrays.asList("fa92b3b41f1c04f685be8de32afc9706")
|
||||
// );
|
||||
// executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,12 +120,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
|
||||
attributes.put("DP","50");
|
||||
for (String name : header.getGenotypeSamples()) {
|
||||
Map<String, Object> gtattributes = new HashMap<String,Object>();
|
||||
gtattributes.put("BB","1");
|
||||
Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true);
|
||||
|
||||
Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
|
||||
genotypes.add(gt);
|
||||
|
||||
}
|
||||
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
|
||||
|
|
|
|||
|
|
@ -76,17 +76,17 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
public void testGetAsMap(){
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(v);
|
||||
//Log scale
|
||||
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
|
||||
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||
EnumMap<GenotypeType,Double> glMap = gl.getAsMap(false);
|
||||
Assert.assertEquals(v[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
|
||||
Assert.assertEquals(v[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
|
||||
Assert.assertEquals(v[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
|
||||
|
||||
//Linear scale
|
||||
glMap = gl.getAsMap(true);
|
||||
double [] vl = MathUtils.normalizeFromLog10(v);
|
||||
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||
Assert.assertEquals(vl[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
|
||||
Assert.assertEquals(vl[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
|
||||
Assert.assertEquals(vl[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
|
||||
|
||||
//Test missing likelihoods
|
||||
gl = GenotypeLikelihoods.fromPLField(".");
|
||||
|
|
@ -111,19 +111,19 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromPLField(vPLString);
|
||||
|
||||
//GQ for the best guess genotype
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),-3.9);
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),-3.9);
|
||||
|
||||
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
|
||||
|
||||
//GQ for the other genotypes
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF), Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR), Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF), Math.log10(1.0 - test[GenotypeType.HOM_REF.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_VAR), Math.log10(1.0 - test[GenotypeType.HOM_VAR.ordinal()-1]));
|
||||
|
||||
//Test missing likelihoods
|
||||
gl = GenotypeLikelihoods.fromPLField(".");
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_VAR),Double.NEGATIVE_INFINITY);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294};
|
||||
|
||||
for ( int i = 0; i < likelihoods.length; i++ ) {
|
||||
Assert.assertEquals(GenotypeLikelihoods.getQualFromLikelihoods(i, likelihoods), expectedQuals[i], 1e-6,
|
||||
Assert.assertEquals(GenotypeLikelihoods.getGQLog10FromLikelihoods(i, likelihoods), expectedQuals[i], 1e-6,
|
||||
"GQ value for genotype " + i + " was not calculated correctly");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,13 +51,13 @@ public class GenotypesContextUnitTest extends BaseTest {
|
|||
C = Allele.create("C");
|
||||
Aref = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
AA = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
AT = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
TT = new Genotype("TT", Arrays.asList(T, T));
|
||||
AC = new Genotype("AC", Arrays.asList(Aref, C));
|
||||
CT = new Genotype("CT", Arrays.asList(C, T));
|
||||
CC = new Genotype("CC", Arrays.asList(C, C));
|
||||
MISSING = new Genotype("MISSING", Arrays.asList(C, C));
|
||||
AA = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
AT = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
TT = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
AC = GenotypeBuilder.create("AC", Arrays.asList(Aref, C));
|
||||
CT = GenotypeBuilder.create("CT", Arrays.asList(C, T));
|
||||
CC = GenotypeBuilder.create("CC", Arrays.asList(C, C));
|
||||
MISSING = GenotypeBuilder.create("MISSING", Arrays.asList(C, C));
|
||||
|
||||
allGenotypes = Arrays.asList(AA, AT, TT, AC, CT, CC);
|
||||
}
|
||||
|
|
@ -223,8 +223,8 @@ public class GenotypesContextUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, dataProvider = "GenotypesContextProvider")
|
||||
public void testAdds(GenotypesContextProvider cfg) {
|
||||
Genotype add1 = new Genotype("add1", Arrays.asList(Aref, Aref));
|
||||
Genotype add2 = new Genotype("add2", Arrays.asList(Aref, Aref));
|
||||
Genotype add1 = GenotypeBuilder.create("add1", Arrays.asList(Aref, Aref));
|
||||
Genotype add2 = GenotypeBuilder.create("add2", Arrays.asList(Aref, Aref));
|
||||
|
||||
GenotypesContext gc = cfg.makeContext();
|
||||
gc.add(add1);
|
||||
|
|
@ -279,7 +279,7 @@ public class GenotypesContextUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, dataProvider = "GenotypesContextProvider")
|
||||
public void testSet(GenotypesContextProvider cfg) {
|
||||
Genotype set = new Genotype("replace", Arrays.asList(Aref, Aref));
|
||||
Genotype set = GenotypeBuilder.create("replace", Arrays.asList(Aref, Aref));
|
||||
int n = cfg.makeContext().size();
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
GenotypesContext gc = cfg.makeContext();
|
||||
|
|
@ -297,7 +297,7 @@ public class GenotypesContextUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < n; i++ ) {
|
||||
GenotypesContext gc = cfg.makeContext();
|
||||
Genotype toReplace = gc.get(i);
|
||||
Genotype replacement = new Genotype(toReplace.getSampleName(), Arrays.asList(Aref, Aref));
|
||||
Genotype replacement = GenotypeBuilder.create(toReplace.getSampleName(), Arrays.asList(Aref, Aref));
|
||||
gc.replace(replacement);
|
||||
ArrayList<Genotype> l = new ArrayList<Genotype>(cfg.initialSamples);
|
||||
l.set(i, replacement);
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
VariantContext sub = vc.subContextFromSamples(samples);
|
||||
VariantContext sub = vc.subContextFromSamples(samples, true);
|
||||
sub.getNSamples();
|
||||
}
|
||||
};
|
||||
|
|
@ -230,7 +230,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
for ( int i = 0; i < dupsToMerge; i++ ) {
|
||||
GenotypesContext gc = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
gc.add(new Genotype(g.getSampleName()+"_"+i, g));
|
||||
gc.add(new GenotypeBuilder(g).name(g.getSampleName()+"_"+i).make());
|
||||
}
|
||||
toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ public class VariantContextTestProvider {
|
|||
for ( int i = 0; i < genotypes.length - 1; i++ )
|
||||
gc.add(genotypes[i]);
|
||||
for ( int i = 0; i < nCopiesOfLast; i++ )
|
||||
gc.add(new Genotype("copy" + i, last));
|
||||
gc.add(new GenotypeBuilder(last).name("copy" + i).make());
|
||||
add(builder.genotypes(gc));
|
||||
}
|
||||
}
|
||||
|
|
@ -286,12 +286,12 @@ public class VariantContextTestProvider {
|
|||
// test ref/ref
|
||||
final Allele ref = site.getReference();
|
||||
final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null;
|
||||
final Genotype homRef = new Genotype("homRef", Arrays.asList(ref, ref));
|
||||
final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref));
|
||||
addGenotypeTests(site, homRef);
|
||||
|
||||
if ( alt1 != null ) {
|
||||
final Genotype het = new Genotype("het", Arrays.asList(ref, alt1));
|
||||
final Genotype homVar = new Genotype("homVar", Arrays.asList(alt1, alt1));
|
||||
final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1));
|
||||
final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1));
|
||||
addGenotypeTests(site, homRef, het);
|
||||
addGenotypeTests(site, homRef, het, homVar);
|
||||
final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
|
@ -299,17 +299,17 @@ public class VariantContextTestProvider {
|
|||
// ploidy
|
||||
if ( ENABLE_PLOIDY_TESTS ) {
|
||||
addGenotypeTests(site,
|
||||
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||
new Genotype("hap", Arrays.asList(ref)));
|
||||
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
|
||||
GenotypeBuilder.create("hap", Arrays.asList(ref)));
|
||||
|
||||
addGenotypeTests(site,
|
||||
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||
new Genotype("tet", Arrays.asList(ref, alt1, alt1)));
|
||||
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
|
||||
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
|
||||
|
||||
addGenotypeTests(site,
|
||||
new Genotype("nocall", noCall),
|
||||
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||
new Genotype("tet", Arrays.asList(ref, alt1, alt1)));
|
||||
GenotypeBuilder.create("nocall", noCall),
|
||||
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
|
||||
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -317,26 +317,26 @@ public class VariantContextTestProvider {
|
|||
if ( site.getNAlleles() == 2 ) {
|
||||
// testing PLs
|
||||
addGenotypeTests(site,
|
||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}),
|
||||
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
|
||||
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{0, -1, -2}),
|
||||
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3}));
|
||||
|
||||
addGenotypeTests(site,
|
||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
|
||||
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
|
||||
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3}));
|
||||
|
||||
addGenotypeTests(site,
|
||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2000, -1000}));
|
||||
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
|
||||
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2000, -1000}));
|
||||
|
||||
addGenotypeTests(site, // missing PLs
|
||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||
new Genotype("g2", Arrays.asList(ref, ref), -1));
|
||||
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
|
||||
GenotypeBuilder.create("g2", Arrays.asList(ref, ref)));
|
||||
}
|
||||
else if ( site.getNAlleles() == 3 ) {
|
||||
// testing PLs
|
||||
addGenotypeTests(site,
|
||||
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2, -3, -4, -5}),
|
||||
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3, -4, -5, -6}));
|
||||
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{0, -1, -2, -3, -4, -5}),
|
||||
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3, -4, -5, -6}));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -383,11 +383,10 @@ public class VariantContextTestProvider {
|
|||
|
||||
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
|
||||
if ( value.length == 0 )
|
||||
return new Genotype(name, Arrays.asList(ref, ref), -1);
|
||||
return GenotypeBuilder.create(name, Arrays.asList(ref, ref));
|
||||
else {
|
||||
final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value);
|
||||
Map<String, Object> attr = Collections.singletonMap(key, toAdd);
|
||||
return new Genotype(name, Arrays.asList(ref, ref), -1, null, attr, false);
|
||||
return new GenotypeBuilder(name, Arrays.asList(ref, ref)).attribute(key, toAdd).make();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -488,7 +487,7 @@ public class VariantContextTestProvider {
|
|||
Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString());
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters());
|
||||
Assert.assertEquals(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes());
|
||||
Assert.assertEquals(actual.isPhased(), expected.isPhased());
|
||||
Assert.assertEquals(actual.getPloidy(), expected.getPloidy());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -276,7 +276,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testCreatingPartiallyCalledGenotype() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, C);
|
||||
Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL));
|
||||
Genotype g = GenotypeBuilder.create("foo", Arrays.asList(C, Allele.NO_CALL));
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make();
|
||||
|
||||
Assert.assertTrue(vc.isSNP());
|
||||
|
|
@ -293,7 +293,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
|
||||
Assert.assertFalse(vc.getGenotype("foo").isHom());
|
||||
Assert.assertTrue(vc.getGenotype("foo").isMixed());
|
||||
Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
|
||||
Assert.assertEquals(vc.getGenotype("foo").getType(), GenotypeType.MIXED);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = Exception.class)
|
||||
|
|
@ -351,9 +351,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
public void testAccessingSimpleSNPGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
||||
.genotypes(g1, g2, g3).make();
|
||||
|
|
@ -388,12 +388,12 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
public void testAccessingCompleteGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = new Genotype("Td", Arrays.asList(T, del));
|
||||
Genotype g5 = new Genotype("dd", Arrays.asList(del, del));
|
||||
Genotype g6 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
|
||||
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
|
||||
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
||||
.genotypes(g1, g2, g3, g4, g5, g6).make();
|
||||
|
|
@ -418,9 +418,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
List<Allele> alleles2 = Arrays.asList(Aref);
|
||||
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
|
||||
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
|
||||
Genotype g1 = new Genotype("AA1", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AA2", Arrays.asList(Aref, Aref));
|
||||
Genotype g3 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
|
||||
Genotype g3 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
||||
.genotypes(g1, g2, g3).make();
|
||||
|
||||
|
|
@ -439,8 +439,8 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testFilters() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1, g2).make();
|
||||
|
||||
|
|
@ -543,11 +543,11 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testGetGenotypeCounts() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = new Genotype("A.", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype g5 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = GenotypeBuilder.create("A.", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype g5 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
|
||||
// we need to create a new VariantContext each time
|
||||
VariantContext vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
|
|
@ -565,19 +565,19 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testVCFfromGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g5 = new Genotype("--", Arrays.asList(del, del));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
|
||||
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())));
|
||||
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())));
|
||||
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())));
|
||||
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())));
|
||||
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())));
|
||||
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())));
|
||||
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
|
||||
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
|
||||
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
|
||||
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
|
||||
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
|
||||
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
|
||||
|
||||
Assert.assertTrue(vc12.isPolymorphicInSamples());
|
||||
Assert.assertTrue(vc23.isPolymorphicInSamples());
|
||||
|
|
@ -620,9 +620,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public void testGetGenotypeMethods() {
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
|
||||
|
||||
|
|
@ -725,9 +725,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "SitesAndGenotypesVC")
|
||||
public Object[][] MakeSitesAndGenotypesVCs() {
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
|
||||
VariantContext sites = new VariantContextBuilder("sites", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).make();
|
||||
VariantContext genotypes = new VariantContextBuilder(sites).source("genotypes").genotypes(g1, g2, g3).make();
|
||||
|
|
@ -764,9 +764,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
modified = new VariantContextBuilder(modified).attributes(null).make();
|
||||
Assert.assertTrue(modified.getAttributes().isEmpty());
|
||||
|
||||
Genotype g1 = new Genotype("AA2", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT2", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT2", Arrays.asList(T, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT2", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT2", Arrays.asList(T, T));
|
||||
GenotypesContext gc = GenotypesContext.create(g1,g2,g3);
|
||||
modified = new VariantContextBuilder(cfg.vc).genotypes(gc).make();
|
||||
Assert.assertEquals(modified.getGenotypes(), gc);
|
||||
|
|
@ -824,13 +824,13 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "SubContextTest")
|
||||
public void runSubContextTest(SubContextTest cfg) {
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
|
||||
GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
|
||||
VariantContext sub = cfg.updateAlleles ? vc.subContextFromSamples(cfg.samples) : vc.subContextFromSamples(cfg.samples, vc.getAlleles());
|
||||
VariantContext sub = vc.subContextFromSamples(cfg.samples, cfg.updateAlleles);
|
||||
|
||||
// unchanged attributes should be the same
|
||||
Assert.assertEquals(sub.getChr(), vc.getChr());
|
||||
|
|
@ -911,7 +911,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
public void runSampleNamesTest(SampleNamesTest cfg) {
|
||||
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
|
||||
for ( final String name : cfg.sampleNames ) {
|
||||
gc.add(new Genotype(name, Arrays.asList(Aref, T)));
|
||||
gc.add(GenotypeBuilder.create(name, Arrays.asList(Aref, T)));
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
|
||||
|
|
@ -926,11 +926,11 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testGenotypeCounting() {
|
||||
Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL));
|
||||
Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref));
|
||||
Genotype het = new Genotype("het", Arrays.asList(Aref, T));
|
||||
Genotype homVar = new Genotype("homVar", Arrays.asList(T, T));
|
||||
Genotype noCall = GenotypeBuilder.create("nocall", Arrays.asList(Allele.NO_CALL));
|
||||
Genotype mixed = GenotypeBuilder.create("mixed", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(Aref, Aref));
|
||||
Genotype het = GenotypeBuilder.create("het", Arrays.asList(Aref, T));
|
||||
Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(T, T));
|
||||
|
||||
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
|
||||
final int nCycles = allGenotypes.size() * 10;
|
||||
|
|
@ -943,7 +943,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
nSamples++;
|
||||
Genotype g = allGenotypes.get(j % allGenotypes.size());
|
||||
final String name = String.format("%s_%d%d", g.getSampleName(), i, j);
|
||||
gc.add(new Genotype(name, g.getAlleles()));
|
||||
gc.add(GenotypeBuilder.create(name, g.getAlleles()));
|
||||
switch ( g.getType() ) {
|
||||
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
|
||||
case HOM_REF: nA += 2; nHomRef++; break;
|
||||
|
|
|
|||
|
|
@ -64,16 +64,16 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Genotype makeG(String sample, Allele a1, Allele a2) {
|
||||
return new Genotype(sample, Arrays.asList(a1, a2));
|
||||
return GenotypeBuilder.create(sample, Arrays.asList(a1, a2));
|
||||
}
|
||||
|
||||
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) {
|
||||
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, pls);
|
||||
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).PL(pls).make();
|
||||
}
|
||||
|
||||
|
||||
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
|
||||
return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
|
||||
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).make();
|
||||
}
|
||||
|
||||
private VariantContext makeVC(String source, List<Allele> alleles) {
|
||||
|
|
@ -523,7 +523,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
for (Genotype value : actual) {
|
||||
Genotype expectedValue = expected.get(value.getSampleName());
|
||||
|
||||
Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal");
|
||||
Assert.assertEquals(value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal");
|
||||
Assert.assertEquals(value.getLog10PError(), expectedValue.getLog10PError(), "GQ values aren't equal");
|
||||
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not");
|
||||
if ( value.hasLikelihoods() )
|
||||
|
|
|
|||
Loading…
Reference in New Issue