Efficient Genotype object Intermediate commit

-- Created a new Genotype interface with a more limited set of operations
-- Old genotype object is now SlowGenotype.  New genotype object is FastGenotype.  They can be used interchangable
-- There's no way to create Genotypes directly any longer.  You have to use GenotypeBuilder just like VariantContextBuilder
-- Modified lots and lots of code to use GenotypeBuilder
-- Added a temporary hidden argument to engine to use FastGenotype by default.  Current default is SlowGenotype
-- Lots of bug fixes to BCF2 codec and encoder.
-- Feature additions
  -- Now properly handles BCF2 -> BCF2 without decoding or encoding from scratch the BCF2 genotype bytes
  -- Cleaned up semantics of subContextFromSamples.  There's one function that either rederives or not the alleles from the subsetted genotypes

-- MASSIVE BUGFIX in SelectVariants.  The code has been decoding genotypes always, even if you were not subsetting down samples.  Fixed!
This commit is contained in:
Mark DePristo 2012-06-01 19:25:11 -04:00
parent a648b5e65e
commit d37a8a0bc8
54 changed files with 1831 additions and 1277 deletions

View File

@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
import java.io.File;
import java.io.FileInputStream;
@ -221,6 +222,10 @@ public class GenomeAnalysisEngine {
if (this.getArguments().nonDeterministicRandomSeed)
resetRandomGenerator(System.currentTimeMillis());
// TODO -- REMOVE ME WHEN WE STOP BCF testing
if ( this.getArguments().USE_FAST_GENOTYPES )
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = true;
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (this.getArguments().BQSR_RECAL_FILE != null)
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);

View File

@ -336,6 +336,11 @@ public class GATKArgumentCollection {
public boolean generateShadowBCF = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
@Argument(fullName="useFastGenotypes",shortName = "useFastGenotypes",doc="",required=false)
@Hidden
public boolean USE_FAST_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other

View File

@ -251,7 +251,7 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>();
Collection<Genotype> genotypes = new ArrayList<Genotype>();
Genotype call = new Genotype(name, genotypeAlleles);
Genotype call = GenotypeBuilder.create(name, genotypeAlleles);
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
@ -344,7 +344,7 @@ public class VariantContextAdaptors {
alleles.add(allele2);
}
Genotype g = new Genotype(samples[i], myAlleles);
Genotype g = GenotypeBuilder.create(samples[i], myAlleles);
genotypes.add(g);
}

View File

@ -73,7 +73,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
}
// we need to add counts in the correct order
Integer[] counts = new Integer[alleleCounts.size()];
int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
@ -124,7 +124,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
}
}
Integer[] counts = new Integer[alleleCounts.size()];
int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(REF_ALLELE);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
@ -132,8 +132,8 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
return toADAnnotation(counts);
}
private final Map<String, Object> toADAnnotation(final Integer[] counts) {
return Collections.singletonMap(getKeyNames().get(0), (Object)Arrays.asList(counts));
private final Map<String, Object> toADAnnotation(final int[] counts) {
return Collections.singletonMap(getKeyNames().get(0), (Object)counts);
}
private String getAlleleRepresentation(Allele allele) {

View File

@ -272,13 +272,13 @@ public class VariantAnnotatorEngine {
continue;
}
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getExtendedAttributes());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
Map<String, Object> result = annotation.annotate(tracker, walker, ref, context, vc, genotype);
if ( result != null )
genotypeAnnotations.putAll(result);
}
genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
genotypes.add(new GenotypeBuilder(genotype).attributes(genotypeAnnotations).make());
}
return genotypes;

View File

@ -204,8 +204,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
}
for ( final Genotype g : vc_input.getGenotypes() ) {
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
boolean genotypeIsPhased = true;
String sample = g.getSampleName();
@ -271,7 +269,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
// Beagle gives probability that genotype is AA, AB and BB.
// Which, by definition, are prob of hom ref, het and hom var.
Double probWrongGenotype, genotypeQuality;
double probWrongGenotype, genotypeQuality;
Double homRefProbability = Double.valueOf(beagleProbabilities.get(0));
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
@ -300,7 +298,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
else
genotypeQuality = log10(probWrongGenotype);
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getAttributes());
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getExtendedAttributes());
// get original encoding and add to keynotype attributes
String a1, a2, og;
@ -328,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
else {
originalAttributes.put("OG",".");
}
Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).GQ(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
beagleVarCounts++;
}

View File

@ -36,10 +36,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import java.util.*;
@ -260,7 +257,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
@ -270,21 +267,20 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
}
for (String sample : samples) {
Map<String, Object> infos = new HashMap<String, Object>();
SampleStatistics sampleStat = stats.getSample(sample);
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
infos.put("MED", sampleStat.getQuantileDepth(0.50));
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
final GenotypeBuilder gb = new GenotypeBuilder(sample);
Set<String> filters = new HashSet<String>();
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
SampleStatistics sampleStat = stats.getSample(sample);
gb.DP((int)sampleStat.averageCoverage());
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
if (debug) {
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
}
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
genotypes.add(gb.make());
}
vcb = vcb.genotypes(genotypes);
@ -299,8 +295,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
* @param statuses the set of statuses to be converted
* @return a matching set of strings
*/
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
Set<String> output = new HashSet<String>(statuses.size());
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
List<String> output = new ArrayList<String>(statuses.size());
for (CallableStatus status : statuses)
output.add(status.name());

View File

@ -79,14 +79,12 @@ class SampleStatistics {
* @return the callable statuses of the entire sample
*/
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
Set<CallableStatus> output = new HashSet<CallableStatus>();
// We check if reads are present ot prevent div / 0 exceptions
if (nReads == 0) {
output.add(CallableStatus.NO_READS);
return output;
return Collections.singleton(CallableStatus.NO_READS);
}
Set<CallableStatus> output = new HashSet<CallableStatus>();
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
// initialize map
@ -126,6 +124,7 @@ class SampleStatistics {
if (output.isEmpty()) {
output.add(CallableStatus.PASS);
}
return output;
}

View File

@ -29,11 +29,13 @@ import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
@ -109,9 +111,12 @@ public class VCFDiffableReader implements DiffableReader {
for (Genotype g : vc.getGenotypes() ) {
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
gRoot.add("GT", g.getGenotypeString());
gRoot.add("GQ", g.hasLog10PError() ? g.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4 );
if ( g.hasGQ() ) gRoot.add("GQ", g.getGQ() );
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
if ( ! attribute.getKey().startsWith("_") )
gRoot.add(attribute.getKey(), attribute.getValue());
}

View File

@ -297,13 +297,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
// for each genotype, check filters then create a new object
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() ) {
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
List<String> filters = new ArrayList<String>(g.getFilters());
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
if ( VariantContextUtils.match(vc, g, exp) )
filters.add(exp.name);
}
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), filters, g.getAttributes(), g.isPhased()));
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
} else {
genotypes.add(g);
}

View File

@ -141,13 +141,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (context.hasBasePileup()) {
final ReadBackedPileup pileup = context.getBasePileup();
if (pileup != null) {
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
final HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
b.PL(genotypeLikelihoods);
b.DP(getFilteredDepth(pileup));
genotypes.add(b.make());
if (DEBUG) {
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());

View File

@ -158,12 +158,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
myLikelihoods[i] = allLikelihoods[PLordering[i]];
// normalize in log space so that max element is zero.
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
final HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth);
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name);
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
gb.PL(genotypeLikelihoods);
gb.DP(sampleData.depth);
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).make();

View File

@ -1148,13 +1148,12 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
GenotypesContext genotypes = GenotypesContext.create();
for ( String sample : normalSamples ) {
Map<String,Object> attrs = call.makeStatsAttributes(null);
if ( ! discard_event ) // we made a call - put actual het genotype here:
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
final GenotypeBuilder gb = new GenotypeBuilder(sample);
gb.attributes(call.makeStatsAttributes(null));
gb.alleles(! discard_event
? alleles // we made a call - put actual het genotype here:
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
genotypes.add(gb.make());
}
Set<String> filters = null;
@ -1238,11 +1237,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
GenotypesContext genotypes = GenotypesContext.create();
for ( String sample : normalSamples ) {
genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsNormal,false));
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
}
for ( String sample : tumorSamples ) {
genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsTumor,false) );
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
}
Set<String> filters = null;

View File

@ -97,10 +97,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
private ArrayList<Sample> trios = new ArrayList<Sample>();
//Matrix of priors for all genotype combinations
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> mvCountMatrix;
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix;
//Matrix of allele transmission
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>> transmissionMatrix;
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>> transmissionMatrix;
//Metrics counters hash keys
private final Byte NUM_TRIO_GENOTYPES_CALLED = 0;
@ -138,17 +138,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
private EnumMap<FamilyMember,Genotype> trioPhasedGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
private ArrayList<Allele> getAlleles(Genotype.Type genotype){
private ArrayList<Allele> getAlleles(GenotypeType genotype){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
if(genotype == Genotype.Type.HOM_REF){
if(genotype == GenotypeType.HOM_REF){
alleles.add(REF);
alleles.add(REF);
}
else if(genotype == Genotype.Type.HET){
else if(genotype == GenotypeType.HET){
alleles.add(REF);
alleles.add(VAR);
}
else if(genotype == Genotype.Type.HOM_VAR){
else if(genotype == GenotypeType.HOM_VAR){
alleles.add(VAR);
alleles.add(VAR);
}
@ -158,27 +158,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
return alleles;
}
private boolean isPhasable(Genotype.Type genotype){
return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR;
private boolean isPhasable(GenotypeType genotype){
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
}
//Create a new Genotype based on information from a single individual
//Homozygous genotypes will be set as phased, heterozygous won't be
private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){
if(genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HOM_VAR){
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME, getAlleles(genotype), Genotype.NO_LOG10_PERROR, null, null, true));
}
else
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME,getAlleles(genotype),Genotype.NO_LOG10_PERROR,null,null,false));
private void phaseSingleIndividualAlleles(GenotypeType genotype, FamilyMember familyMember){
boolean phase = genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HOM_VAR;
trioPhasedGenotypes.put(familyMember, makeGenotype(genotype, phase));
}
private Genotype makeGenotype(final GenotypeType type, boolean phase) {
return makeGenotype(getAlleles(type), phase);
}
private Genotype makeGenotype(final List<Allele> alleles, boolean phase) {
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
gb.phased(phase);
return gb.make();
}
//Find the phase for a parent/child pair
private void phasePairAlleles(Genotype.Type parentGenotype, Genotype.Type childGenotype, FamilyMember parent){
private void phasePairAlleles(GenotypeType parentGenotype, GenotypeType childGenotype, FamilyMember parent){
//Special case for Het/Het as it is ambiguous
if(parentGenotype == Genotype.Type.HET && childGenotype == Genotype.Type.HET){
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, getAlleles(parentGenotype), Genotype.NO_LOG10_PERROR, null, null, false));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
if(parentGenotype == GenotypeType.HET && childGenotype == GenotypeType.HET){
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
return;
}
@ -190,34 +197,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//If there is a possible phasing between the parent and child => phase
int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
if(childTransmittedAlleleIndex > -1){
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
trioPhasedGenotypes.put(parent, makeGenotype(parentAlleles, true));
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
if(parent.equals(FamilyMember.MOTHER))
childPhasedAlleles.add(childAlleles.get(0));
else
childPhasedAlleles.add(0,childAlleles.get(0));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
}
else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
parentPhasedAlleles.add(parentAlleles.get(1));
parentPhasedAlleles.add(parentAlleles.get(0));
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
trioPhasedGenotypes.put(parent, makeGenotype(parentPhasedAlleles, true));
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
if(parent.equals(FamilyMember.MOTHER))
childPhasedAlleles.add(childAlleles.get(0));
else
childPhasedAlleles.add(0,childAlleles.get(0));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
}
//This is a Mendelian Violation => Do not phase
else{
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME,getAlleles(parentGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
}
}
//Phases a family by transmission
private void phaseFamilyAlleles(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
private void phaseFamilyAlleles(GenotypeType mother, GenotypeType father, GenotypeType child){
Set<ArrayList<Allele>> possiblePhasedChildGenotypes = new HashSet<ArrayList<Allele>>();
ArrayList<Allele> motherAlleles = getAlleles(mother);
@ -246,7 +253,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
motherPhasedAlleles.add(motherAlleles.get(0));
else
motherPhasedAlleles.add(motherAlleles.get(1));
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,motherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(motherPhasedAlleles, true));
//Create father's genotype
ArrayList<Allele> fatherPhasedAlleles = new ArrayList<Allele>(2);
@ -255,10 +262,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
fatherPhasedAlleles.add(fatherAlleles.get(0));
else
fatherPhasedAlleles.add(fatherAlleles.get(1));
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,fatherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(fatherPhasedAlleles,true));
//Create child's genotype
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,childPhasedAllelesAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAllelesAlleles,true));
//Once a phased combination is found; exit
return;
@ -266,16 +273,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
}
//If this is reached then no phasing could be found
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,getAlleles(mother),Genotype.NO_LOG10_PERROR,null,null,false));
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,getAlleles(father),Genotype.NO_LOG10_PERROR,null,null,false));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(child),Genotype.NO_LOG10_PERROR,null,null,false));
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother,false));
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(father,false));
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(child,false));
}
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
If one or more genotypes are set as NO_CALL or UNAVAILABLE, it will phase them like a pair
or single individual.
*/
public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
public TrioPhase(GenotypeType mother, GenotypeType father, GenotypeType child){
//Take care of cases where one or more family members are no call
if(!isPhasable(child)){
@ -297,7 +304,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
}
//Special case for Het/Het/Het as it is ambiguous
else if(mother == Genotype.Type.HET && father == Genotype.Type.HET && child == Genotype.Type.HET){
else if(mother == GenotypeType.HET && father == GenotypeType.HET && child == GenotypeType.HET){
phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
@ -311,7 +318,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){
ArrayList<Allele> childAlleles = new ArrayList<Allele>(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles());
childAlleles.add(childAlleles.remove(0));
trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
trioPhasedGenotypes.put(FamilyMember.CHILD,makeGenotype(childAlleles,true));
}
}
@ -347,7 +354,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//Add the transmission probability
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
genotypeAttributes.putAll(genotype.getAttributes());
genotypeAttributes.putAll(genotype.getExtendedAttributes());
if(transmissionProb>NO_TRANSMISSION_PROB)
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
@ -370,7 +377,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
else
log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType());
return new Genotype(genotype.getSampleName(), phasedAlleles, log10Error, null, genotypeAttributes, phasedGenotype.isPhased());
return new GenotypeBuilder(genotype).alleles(phasedAlleles)
.GQ(log10Error)
.attributes(genotypeAttributes)
.phased(phasedGenotype.isPhased()).make();
}
@ -438,15 +448,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//Create the transmission matrices
private void buildMatrices(){
mvCountMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
transmissionMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>>(Genotype.Type.class);
for(Genotype.Type mother : Genotype.Type.values()){
mvCountMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
transmissionMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>(Genotype.Type.class));
for(Genotype.Type father : Genotype.Type.values()){
mvCountMatrix.get(mother).put(father,new EnumMap<Genotype.Type, Integer>(Genotype.Type.class));
transmissionMatrix.get(mother).put(father,new EnumMap<Genotype.Type,TrioPhase>(Genotype.Type.class));
for(Genotype.Type child : Genotype.Type.values()){
mvCountMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
transmissionMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>>(GenotypeType.class);
for(GenotypeType mother : GenotypeType.values()){
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>(GenotypeType.class));
for(GenotypeType father : GenotypeType.values()){
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioPhase>(GenotypeType.class));
for(GenotypeType child : GenotypeType.values()){
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child));
}
@ -457,16 +467,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//Returns the number of Mendelian Violations for a given genotype combination.
//If one of the parents genotype is missing, it will consider it as a parent/child pair
//If the child genotype or both parents genotypes are missing, 0 is returned.
private int getCombinationMVCount(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
private int getCombinationMVCount(GenotypeType mother, GenotypeType father, GenotypeType child){
//Child is no call => No MV
if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE)
if(child == GenotypeType.NO_CALL || child == GenotypeType.UNAVAILABLE)
return 0;
//Add parents with genotypes for the evaluation
ArrayList<Genotype.Type> parents = new ArrayList<Genotype.Type>();
if (!(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE))
ArrayList<GenotypeType> parents = new ArrayList<GenotypeType>();
if (!(mother == GenotypeType.NO_CALL || mother == GenotypeType.UNAVAILABLE))
parents.add(mother);
if (!(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE))
if (!(father == GenotypeType.NO_CALL || father == GenotypeType.UNAVAILABLE))
parents.add(father);
//Both parents no calls => No MV
@ -477,35 +487,35 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
int parentsNumRefAlleles = 0;
int parentsNumAltAlleles = 0;
for(Genotype.Type parent : parents){
if(parent == Genotype.Type.HOM_REF){
for(GenotypeType parent : parents){
if(parent == GenotypeType.HOM_REF){
parentsNumRefAlleles++;
}
else if(parent == Genotype.Type.HET){
else if(parent == GenotypeType.HET){
parentsNumRefAlleles++;
parentsNumAltAlleles++;
}
else if(parent == Genotype.Type.HOM_VAR){
else if(parent == GenotypeType.HOM_VAR){
parentsNumAltAlleles++;
}
}
//Case Child is HomRef
if(child == Genotype.Type.HOM_REF){
if(child == GenotypeType.HOM_REF){
if(parentsNumRefAlleles == parents.size())
return 0;
else return (parents.size()-parentsNumRefAlleles);
}
//Case child is HomVar
if(child == Genotype.Type.HOM_VAR){
if(child == GenotypeType.HOM_VAR){
if(parentsNumAltAlleles == parents.size())
return 0;
else return parents.size()-parentsNumAltAlleles;
}
//Case child is Het
if(child == Genotype.Type.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
if(child == GenotypeType.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
return 0;
//MV
@ -513,7 +523,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
}
//Given two trio genotypes combinations, returns the number of different genotypes between the two combinations.
private int countFamilyGenotypeDiff(Genotype.Type motherOriginal,Genotype.Type fatherOriginal,Genotype.Type childOriginal,Genotype.Type motherNew,Genotype.Type fatherNew,Genotype.Type childNew){
private int countFamilyGenotypeDiff(GenotypeType motherOriginal,GenotypeType fatherOriginal,GenotypeType childOriginal,GenotypeType motherNew,GenotypeType fatherNew,GenotypeType childNew){
int count = 0;
if(motherOriginal!=motherNew)
count++;
@ -526,21 +536,21 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//Get a Map of genotype likelihoods.
//In case of null, unavailable or no call, all likelihoods are 1/3.
private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
if(genotype == null || !genotype.isCalled()){
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
likelihoods.put(Genotype.Type.HET,1.0/3.0);
likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
likelihoods.put(GenotypeType.HET,1.0/3.0);
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
return likelihoods;
}
return genotype.getLikelihoods().getAsMap(true);
}
//Returns the Genotype.Type; returns UNVAILABLE if given null
private Genotype.Type getTypeSafeNull(Genotype genotype){
//Returns the GenotypeType; returns UNVAILABLE if given null
private GenotypeType getTypeSafeNull(Genotype genotype){
if(genotype == null)
return Genotype.Type.UNAVAILABLE;
return GenotypeType.UNAVAILABLE;
return genotype.getType();
}
@ -561,18 +571,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
//Always assign the first parent as the parent having genotype information in pairs
//Always assign the mother as the first parent in trios
int parentsCalled = 0;
Map<Genotype.Type,Double> firstParentLikelihoods;
Map<Genotype.Type,Double> secondParentLikelihoods;
ArrayList<Genotype.Type> bestFirstParentGenotype = new ArrayList<Genotype.Type>();
ArrayList<Genotype.Type> bestSecondParentGenotype = new ArrayList<Genotype.Type>();
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
Genotype.Type pairSecondParentGenotype = null;
Map<GenotypeType,Double> firstParentLikelihoods;
Map<GenotypeType,Double> secondParentLikelihoods;
ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
GenotypeType pairSecondParentGenotype = null;
if(mother == null || !mother.isCalled()){
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
bestFirstParentGenotype.add(getTypeSafeNull(father));
bestSecondParentGenotype.add(getTypeSafeNull(mother));
pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
if(father != null && father.isCalled())
parentsCalled = 1;
}
@ -583,12 +593,12 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
bestSecondParentGenotype.add(getTypeSafeNull(father));
if(father == null || !father.isCalled()){
parentsCalled = 1;
pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
}else{
parentsCalled = 2;
}
}
Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
bestChildGenotype.add(getTypeSafeNull(child));
//Prior vars
@ -604,9 +614,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
int mvCount;
int cumulativeMVCount = 0;
double configurationLikelihood = 0;
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
for(Map.Entry<Genotype.Type,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
for(Map.Entry<Genotype.Type,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
for(Map.Entry<GenotypeType,Double> childGenotype : childLikelihoods.entrySet()){
for(Map.Entry<GenotypeType,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
for(Map.Entry<GenotypeType,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
if(parentsCalled<2){

View File

@ -109,14 +109,13 @@ class PhasingUtils {
}
double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
if (phaseQual.PQ != null)
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).GQ(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
mergedGenotypes.add(mergedGt);
}

View File

@ -288,7 +288,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
@ -374,7 +374,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (isUnfilteredCalledDiploidGenotype(gt)) {
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
// true <-> can trivially phase a hom site relative to ANY previous site:
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getLog10PError(), gt.getFilters(), gt.getAttributes(), true);
Genotype phasedGt = new GenotypeBuilder(gt).phased(true).make();
uvc.setGenotype(samp, phasedGt);
}
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
@ -408,9 +408,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
if (DEBUG) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
gtAttribs.put(PQ_KEY, pr.phaseQuality);
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
Genotype phasedGt = new GenotypeBuilder(gt)
.alleles(allelePair.getAllelesAsList())
.attribute(PQ_KEY, pr.phaseQuality)
.phased(genotypesArePhased).make();
uvc.setGenotype(samp, phasedGt);
}
@ -428,9 +429,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
interiorUvc.setPhasingInconsistent();
if (genotypesArePhased) {
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
Genotype phasedHomGt = new GenotypeBuilder(handledGt)
.attribute(PQ_KEY, pr.phaseQuality)
.phased(genotypesArePhased).make();
interiorUvc.setGenotype(samp, phasedHomGt);
}
}
@ -1439,7 +1440,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
}
public static boolean isUnfilteredCalledDiploidGenotype(Genotype gt) {
return (gt.isNotFiltered() && gt.isCalled() && gt.getPloidy() == 2);
return (! gt.isFiltered() && gt.isCalled() && gt.getPloidy() == 2);
}
private class MultipleBaseCountsWriter {

View File

@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
return true;
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
// first subset to the samples
VariantContext subContext = vc.subContextFromSamples(samples);
VariantContext subContext = vc.subContextFromSamples(samples, true);
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?

View File

@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
if ( samples == null || samples.isEmpty() )
return true;
VariantContext subContext = vc.subContextFromSamples(samples, vc.getAlleles());
VariantContext subContext = vc.subContextFromSamples(samples, false);
if ( subContext.isPolymorphicInSamples() ) {
return true;
}

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
@ -54,7 +55,7 @@ public class GenotypeConcordance extends VariantEvaluator {
* Initialize this object
*/
public GenotypeConcordance() {
final int nGenotypeTypes = Genotype.Type.values().length;
final int nGenotypeTypes = GenotypeType.values().length;
truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes];
}
@ -75,11 +76,11 @@ public class GenotypeConcordance extends VariantEvaluator {
if (eval != null) {
for (final Genotype g : eval.getGenotypes() ) {
final String sample = g.getSampleName();
final Genotype.Type called = g.getType();
final Genotype.Type truth;
final GenotypeType called = g.getType();
final GenotypeType truth;
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
truth = Genotype.Type.NO_CALL;
truth = GenotypeType.NO_CALL;
} else {
truth = validation.getGenotype(sample).getType();
}
@ -90,19 +91,19 @@ public class GenotypeConcordance extends VariantEvaluator {
// otherwise, mark no-calls for all samples
else {
final Genotype.Type called = Genotype.Type.NO_CALL;
final GenotypeType called = GenotypeType.NO_CALL;
for (final Genotype g : validation.getGenotypes()) {
final Genotype.Type truth = g.getType();
final GenotypeType truth = g.getType();
incrValue(truth, called);
// print out interesting sites
/*
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
if ( (truth == GenotypeType.HOM_VAR || truth == GenotypeType.HET) && called == GenotypeType.NO_CALL ) {
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
}
if ( (called == Genotype.Type.HOM_VAR || called == Genotype.Type.HET) && truth == Genotype.Type.HOM_REF ) {
if ( (called == GenotypeType.HOM_VAR || called == GenotypeType.HET) && truth == GenotypeType.HOM_REF ) {
super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation);
}
}
@ -121,36 +122,36 @@ public class GenotypeConcordance extends VariantEvaluator {
* @param truth the truth type
* @param called the called type
*/
private void incrValue(final Genotype.Type truth, final Genotype.Type called) {
private void incrValue(final GenotypeType truth, final GenotypeType called) {
truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++;
}
private long count(final Genotype.Type truth, final Genotype.Type called) {
private long count(final GenotypeType truth, final GenotypeType called) {
return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()];
}
private long count(final EnumSet<Genotype.Type> truth, final Genotype.Type called) {
private long count(final EnumSet<GenotypeType> truth, final GenotypeType called) {
return count(truth, EnumSet.of(called));
}
private long count(final Genotype.Type truth, final EnumSet<Genotype.Type> called) {
private long count(final GenotypeType truth, final EnumSet<GenotypeType> called) {
return count(EnumSet.of(truth), called);
}
private long count(final EnumSet<Genotype.Type> truth, final EnumSet<Genotype.Type> called) {
private long count(final EnumSet<GenotypeType> truth, final EnumSet<GenotypeType> called) {
long sum = 0;
for ( final Genotype.Type truth1 : truth ) {
for ( final Genotype.Type called1 : called ) {
for ( final GenotypeType truth1 : truth ) {
for ( final GenotypeType called1 : called ) {
sum += count(truth1, called1);
}
}
return sum;
}
private long countDiag( final EnumSet<Genotype.Type> d1 ) {
private long countDiag( final EnumSet<GenotypeType> d1 ) {
long sum = 0;
for(final Genotype.Type e1 : d1 ) {
for(final GenotypeType e1 : d1 ) {
sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()];
}
@ -159,13 +160,13 @@ public class GenotypeConcordance extends VariantEvaluator {
@Override
public void finalizeEvaluation() {
final EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
final EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
final EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
final EnumSet<GenotypeType> allVariantGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET);
final EnumSet<GenotypeType> allCalledGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET, GenotypeType.HOM_REF);
final EnumSet<GenotypeType> allGenotypes = EnumSet.allOf(GenotypeType.class);
// exact values of the table
for ( final Genotype.Type truth : Genotype.Type.values() ) {
for ( final Genotype.Type called : Genotype.Type.values() ) {
for ( final GenotypeType truth : GenotypeType.values() ) {
for ( final GenotypeType called : GenotypeType.values() ) {
final String field = String.format("n_true_%s_called_%s", truth, called);
final Long value = count(truth, called);
map.put(field, value.toString());
@ -173,20 +174,20 @@ public class GenotypeConcordance extends VariantEvaluator {
}
// counts of called genotypes
for ( final Genotype.Type called : Genotype.Type.values() ) {
for ( final GenotypeType called : GenotypeType.values() ) {
final String field = String.format("total_called_%s", called);
final Long value = count(allGenotypes, called);
map.put(field, value.toString());
}
// counts of true genotypes
for ( final Genotype.Type truth : Genotype.Type.values() ) {
for ( final GenotypeType truth : GenotypeType.values() ) {
final String field = String.format("total_true_%s", truth);
final Long value = count(truth, allGenotypes);
map.put(field, value.toString());
}
for ( final Genotype.Type genotype : Genotype.Type.values() ) {
for ( final GenotypeType genotype : GenotypeType.values() ) {
final String field = String.format("percent_%s_called_%s", genotype, genotype);
long numer = count(genotype, genotype);
long denom = count(EnumSet.of(genotype), allGenotypes);
@ -215,7 +216,7 @@ public class GenotypeConcordance extends VariantEvaluator {
// overall genotype concordance of sites called non-ref in eval track
// MAD: this is the non-reference discrepancy rate
final String field = "percent_non_reference_discrepancy_rate";
long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF);
long homrefConcords = count(GenotypeType.HOM_REF, GenotypeType.HOM_REF);
long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
long numer = allNoHomRef - countDiag(allVariantGenotypes);
long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;

View File

@ -199,7 +199,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();

View File

@ -223,7 +223,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
newA = Allele.NO_CALL;
newAlleles.add(newA);
}
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();

View File

@ -538,7 +538,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if (!selectedTypes.contains(vc.getType()))
continue;
VariantContext sub = subsetRecord(vc, samples, EXCLUDE_NON_VARIANTS);
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
@ -687,18 +687,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
* @param samples the samples to extract
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final Set<String> samples, final boolean excludeNonVariants) {
if ( samples == null || samples.isEmpty() )
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
return vc;
final VariantContext sub;
if ( excludeNonVariants )
sub = vc.subContextFromSamples(samples); // strip out the alternate alleles that aren't being used
else
sub = vc.subContextFromSamples(samples, vc.getAlleles());
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
VariantContextBuilder builder = new VariantContextBuilder(sub);
GenotypesContext newGC = sub.getGenotypes();
@ -708,15 +704,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
//Remove a fraction of the genotypes if needed
if(fractionGenotypes>0){
if ( fractionGenotypes > 0 ){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).GQ(-1).make());
}
else{
genotypes.add(genotype);
@ -750,14 +744,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
for (String sample : originalVC.getSampleNames()) {
Genotype g = originalVC.getGenotype(sample);
if ( g.isNotFiltered() ) {
String dp = (String) g.getAttribute("DP");
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
depth += Integer.valueOf(dp);
}
if ( ! g.isFiltered() ) {
if ( g.hasDP() )
depth += g.getDP();
}
}
builder.attribute("DP", depth);
}

View File

@ -132,7 +132,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// set the appropriate sample name if necessary
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
builder.genotypes(g);
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
@ -30,7 +31,7 @@ public class MendelianViolation {
private boolean allCalledOnly = true;
//Stores occurrences of inheritance
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
private EnumMap<GenotypeType, EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inheritance;
private int violations_total=0;
@ -74,119 +75,119 @@ public class MendelianViolation {
//Count of HomRef/HomRef/HomRef trios
public int getRefRefRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
}
//Count of HomVar/HomVar/HomVar trios
public int getVarVarVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
}
//Count of HomRef/HomVar/Het trios
public int getRefVarHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HET) +
inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
}
//Count of Het/Het/Het trios
public int getHetHetHet(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET);
}
//Count of Het/Het/HomRef trios
public int getHetHetHomRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
}
//Count of Het/Het/HomVar trios
public int getHetHetHomVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
}
//Count of ref alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
//return parentsHetHet_childRef;
}
//Count of var alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedVar(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
//return parentsHetHet_childVar;
}
//Count of ref alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_REF)
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
//return parentsHomRefHet_childRef;
}
//Count of var alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HET)
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
//return parentsHomRefHet_childVar;
}
//Count of ref alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HET)
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
//return parentsHomVarHet_childRef;
}
//Count of var alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedVar(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
//return parentsHomVarHet_childVar;
}
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
public int getParentsRefRefChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_REF -> HET
public int getParentsRefRefChildHet(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
}
//Count of violations of the type HOM_REF/HET -> HOM_VAR
public int getParentsRefHetChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
public int getParentsRefVarChildVar(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR)
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
}
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
public int getParentsRefVarChildRef(){
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
}
//Count of violations of the type HOM_VAR/HET -> HOM_REF
public int getParentsVarHetChildRef(){
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
public int getParentsVarVarChildRef(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF);
}
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
public int getParentsVarVarChildHet(){
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
}
@ -362,12 +363,12 @@ public class MendelianViolation {
private void createInheritanceMap(){
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
for(Genotype.Type mType : Genotype.Type.values()){
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
for(Genotype.Type dType : Genotype.Type.values()){
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
for(Genotype.Type cType : Genotype.Type.values()){
inheritance = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
for(GenotypeType mType : GenotypeType.values()){
inheritance.put(mType, new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
for(GenotypeType dType : GenotypeType.values()){
inheritance.get(mType).put(dType, new EnumMap<GenotypeType,Integer>(GenotypeType.class));
for(GenotypeType cType : GenotypeType.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}
@ -376,9 +377,9 @@ public class MendelianViolation {
}
private void clearInheritanceMap(){
for(Genotype.Type mType : Genotype.Type.values()){
for(Genotype.Type dType : Genotype.Type.values()){
for(Genotype.Type cType : Genotype.Type.values()){
for(GenotypeType mType : GenotypeType.values()){
for(GenotypeType dType : GenotypeType.values()){
for(GenotypeType cType : GenotypeType.values()){
inheritance.get(mType).get(dType).put(cType, 0);
}
}

View File

@ -223,6 +223,19 @@ public class Utils {
return ret.toString();
}
public static String join(String separator, int[] ints) {
if ( ints == null || ints.length == 0)
return "";
else {
StringBuilder ret = new StringBuilder(ints[0]);
for (int i = 1; i < ints.length; ++i) {
ret.append(separator);
ret.append(ints[i]);
}
return ret.toString();
}
}
/**
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
* elti objects (note there's no actual space between sep and the elti elements). Returns

View File

@ -314,7 +314,9 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
final LazyGenotypesContext.LazyParser lazyParser =
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
final int nGenotypes = header.getGenotypeSamples().size();
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, decoder.getRecordBytes(), nGenotypes);
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
nGenotypes);
// did we resort the sample names? If so, we need to load the genotype data
if ( !header.samplesWereAlreadySorted() )
@ -324,6 +326,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
}
}
public static class LazyData {
final public int nGenotypeFields;
final public byte[] bytes;
public LazyData(final int nGenotypeFields, final byte[] bytes) {
this.nGenotypeFields = nGenotypeFields;
this.bytes = bytes;
}
}
private final String getDictionaryString() {
return getDictionaryString((Integer) decoder.decodeTypedValue());
}

View File

@ -58,6 +58,23 @@ public class BCF2Encoder {
return bytes;
}
/**
* Method for writing raw bytes to the encoder stream
*
* The purpuse this method exists is to allow lazy decoding of genotype data. In that
* situation the reader has loaded a block of bytes, and never decoded it, so we
* are just writing it back out immediately as a raw stream of blocks. Any
* bad low-level formatting or changes to that byte[] will result in a malformed
* BCF2 block.
*
* @param bytes a non-null byte array
* @throws IOException
*/
public void writeRawBytes(final byte[] bytes) throws IOException {
assert bytes != null;
encodeStream.write(bytes);
}
// --------------------------------------------------------------------------------
//
// Writing typed values (have type byte)

View File

@ -56,79 +56,8 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
this.nFields = nFields;
}
@Override
public LazyGenotypesContext.LazyData parse(final Object data) {
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
// load our bytep[] data into the decoder
final BCF2Decoder decoder = new BCF2Decoder((byte[])data);
// go ahead and decode everyone
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
if ( samples.size() != nSamples )
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
"different numbers of samples per record. Saw " + samples.size() +
" samples in header but have a record with " + nSamples + " samples");
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
for ( int i = 0; i < nSamples; i++ ) {
// all of the information we need for each genotype, with default values
final String sampleName = samples.get(i);
List<Allele> alleles = null;
boolean isPhased = false;
double log10PError = VariantContext.NO_LOG10_PERROR;
Set<String> filters = null;
Map<String, Object> attributes = null;
double[] log10Likelihoods = null;
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
final String field = entry.getKey();
Object value = entry.getValue().get(i);
try {
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
alleles = decodeGenotypeAlleles(siteAlleles, (List<Integer>)value);
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( value != BCF2Type.INT8.getMissingJavaValue() )
log10PError = ((Integer)value) / -10.0;
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
final List<Integer> pls = (List<Integer>)value;
if ( pls != null ) { // we have a PL field
log10Likelihoods = new double[pls.size()];
for ( int j = 0; j < log10Likelihoods.length; j++ ) {
final double d = pls.get(j);
log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
}
}
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
//filters = new HashSet<String>(values.get(i));
} else { // add to attributes
if ( value != null ) { // don't add missing values
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
if ( value instanceof List && ((List)value).size() == 1)
value = ((List)value).get(0);
attributes.put(field, value);
}
}
} catch ( ClassCastException e ) {
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+ " inconsistent with the value observed in the decoded value in the "
+ " BCF file. Value was " + value);
}
}
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
genotypes.add(g);
}
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
}
// public LazyGenotypesContext.LazyData parse2(final Object data) {
// @Override
// public LazyGenotypesContext.LazyData parse(final Object data) {
// logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
//
// // load our bytep[] data into the decoder
@ -144,40 +73,43 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
//
// final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
// final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
// final GenotypeBuilder gb = new GenotypeBuilder();
// for ( int i = 0; i < nSamples; i++ ) {
// // all of the information we need for each genotype, with default values
// gb.reset();
// gb.name(samples.get(i));
// final String sampleName = samples.get(i);
// List<Allele> alleles = null;
// boolean isPhased = false;
// double log10PError = VariantContext.NO_LOG10_PERROR;
// Set<String> filters = null;
// Map<String, Object> attributes = null;
// double[] log10Likelihoods = null;
//
// for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
// final String field = entry.getKey();
// Object value = entry.getValue().get(i);
// try {
// if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
// gb.alleles(decodeGenotypeAlleles(siteAlleles, (List<Integer>)value));
// } else if ( field.equals(VCFConstants.DEPTH_KEY) ) {
// if ( value != BCF2Type.INT8.getMissingJavaValue() )
// gb.DP((Integer)value);
// alleles = decodeGenotypeAlleles(siteAlleles, (List<Integer>)value);
// } else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
// if ( value != BCF2Type.INT8.getMissingJavaValue() )
// gb.GQ((Integer)value);
// log10PError = ((Integer)value) / -10.0;
// } else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
// final int[] PLs = decodeIntArray(value);
// if ( PLs != null )
// gb.PL(PLs);
// } else if ( field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS) ) {
// final int[] AD = decodeIntArray(value);
// if ( AD != null )
// gb.AD(AD);
// final List<Integer> pls = (List<Integer>)value;
// if ( pls != null ) { // we have a PL field
// log10Likelihoods = new double[pls.size()];
// for ( int j = 0; j < log10Likelihoods.length; j++ ) {
// final double d = pls.get(j);
// log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
// }
// }
// } else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
// throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
// //filters = new HashSet<String>(values.get(i));
// } else { // add to attributes
// if ( value != null ) { // don't add missing values
// if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
// if ( value instanceof List && ((List)value).size() == 1)
// value = ((List)value).get(0);
// gb.attribute(field, value);
// attributes.put(field, value);
// }
// }
// } catch ( ClassCastException e ) {
@ -187,13 +119,82 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
// }
// }
//
// final Genotype g = gb.make();
// if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
//
// final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
// genotypes.add(g);
// }
//
// return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
// }
@Override
public LazyGenotypesContext.LazyData parse(final Object data) {
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
// load our bytep[] data into the decoder
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
// go ahead and decode everyone
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
if ( samples.size() != nSamples )
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
"different numbers of samples per record. Saw " + samples.size() +
" samples in header but have a record with " + nSamples + " samples");
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
final GenotypeBuilder gb = new GenotypeBuilder();
for ( int i = 0; i < nSamples; i++ ) {
// all of the information we need for each genotype, with default values
gb.reset();
gb.name(samples.get(i));
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
final String field = entry.getKey();
Object value = entry.getValue().get(i);
try {
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
gb.alleles(decodeGenotypeAlleles(siteAlleles, (List<Integer>)value));
} else if ( field.equals(VCFConstants.DEPTH_KEY) ) {
if ( value != BCF2Type.INT8.getMissingJavaValue() )
gb.DP((Integer)value);
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( value != BCF2Type.INT8.getMissingJavaValue() )
gb.GQ((Integer)value);
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
final int[] PLs = decodeIntArray(value);
if ( PLs != null )
gb.PL(PLs);
} else if ( field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS) ) {
final int[] AD = decodeIntArray(value);
if ( AD != null )
gb.AD(AD);
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
//filters = new HashSet<String>(values.get(i));
} else { // add to attributes
if ( value != null ) { // don't add missing values
if ( value instanceof List && ((List)value).size() == 1)
value = ((List)value).get(0);
gb.attribute(field, value);
}
}
} catch ( ClassCastException e ) {
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+ " inconsistent with the value observed in the decoded value in the "
+ " BCF file. Value was " + value);
}
}
final Genotype g = gb.make();
genotypes.add(g);
}
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
}
private final int[] decodeIntArray(final Object value) {
// todo -- decode directly into int[]
final List<Integer> pls = (List<Integer>)value;
@ -242,10 +243,15 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
for ( int i = 0; i < nFields; i++ ) {
final int offset = (Integer) decoder.decodeTypedValue();
final String field = codec.getDictionaryString(offset);
// the type of each element
final byte typeDescriptor = decoder.readTypeDescriptor();
final List<Object> values = new ArrayList<Object>(nSamples);
for ( int j = 0; j < nSamples; j++ )
values.add(decoder.decodeTypedValue(typeDescriptor));
assert ! map.containsKey(field);
map.put(field, values);
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
@ -33,9 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;
/**
* Common utilities for working with BCF2 files
@ -77,11 +76,16 @@ public class BCF2Utils {
* The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
* fields.
*
* Note that its critical that the list be dedupped and sorted in a consistent manner each time,
* as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
* the same way as in the header each time it's very bad
*
* @param header the VCFHeader from which to build the dictionary
* @return a non-null dictionary of elements, may be empty
*/
@Ensures("new HashSet(result).size() == result.size()")
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
final ArrayList<String> dict = new ArrayList<String>();
final Set<String> dict = new TreeSet<String>();
// set up the strings dictionary
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
@ -92,7 +96,7 @@ public class BCF2Utils {
}
}
return dict;
return new ArrayList<String>(dict);
}
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {

View File

@ -48,7 +48,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
protected final String[] locParts = new String[6];
// for performance we cache the hashmap of filter encodings for quick lookup
protected HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
protected HashMap<String,List<String>> filterHash = new HashMap<String,List<String>>();
// we store a name to give to each of the variant contexts we emit
protected String name = "Unknown";
@ -108,7 +108,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
* @param filterString the string to parse
* @return a set of the filters applied
*/
protected abstract Set<String> parseFilters(String filterString);
protected abstract List<String> parseFilters(String filterString);
/**
* create a VCF header from a set of header record lines
@ -320,7 +320,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
String ref = getCachedString(parts[3].toUpperCase());
String alts = getCachedString(parts[4].toUpperCase());
builder.log10PError(parseQual(parts[5]));
builder.filters(parseFilters(getCachedString(parts[6])));
final List<String> filters = parseFilters(getCachedString(parts[6]));
if ( filters != null ) builder.filters(new HashSet<String>(filters));
final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs);

View File

@ -78,24 +78,24 @@ public class VCF3Codec extends AbstractVCFCodec {
* @param filterString the string to parse
* @return a set of the filters applied
*/
protected Set<String> parseFilters(String filterString) {
protected List<String> parseFilters(String filterString) {
// null for unfiltered
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null;
// empty set for passes filters
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
List<String> fFields = new ArrayList<String>();
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
return fFields;
return new ArrayList<String>(fFields);
if ( filterString.length() == 0 )
generateException("The VCF specification requires a valid filter status");
// do we have the filter string cached?
if ( filterHash.containsKey(filterString) )
return filterHash.get(filterString);
return new ArrayList<String>(filterHash.get(filterString));
// otherwise we have to parse and cache the value
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
@ -141,7 +141,7 @@ public class VCF3Codec extends AbstractVCFCodec {
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
double GTQual = VariantContext.NO_LOG10_PERROR;
Set<String> genotypeFilters = null;
List<String> genotypeFilters = null;
Map<String, Object> gtAttributes = null;
String sampleName = sampleNameIterator.next();
@ -181,12 +181,10 @@ public class VCF3Codec extends AbstractVCFCodec {
// add it to the list
try {
genotypes.add(new Genotype(sampleName,
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
GTQual,
genotypeFilters,
gtAttributes,
phased));
final Genotype g = new GenotypeBuilder(sampleName)
.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap))
.GQ(GTQual).filters(genotypeFilters).attributes(gtAttributes).phased(phased).make();
genotypes.add(g);
} catch (TribbleException e) {
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
}

View File

@ -127,38 +127,33 @@ public class VCFCodec extends AbstractVCFCodec {
* @param filterString the string to parse
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
*/
protected Set<String> parseFilters(String filterString) {
return parseFilters(filterHash, lineNo, filterString);
}
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
protected List<String> parseFilters(String filterString) {
// null for unfiltered
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null;
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
return Collections.emptySet();
return Collections.emptyList();
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
if ( filterString.length() == 0 )
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
// do we have the filter string cached?
if ( cache != null && cache.containsKey(filterString) )
return Collections.unmodifiableSet(cache.get(filterString));
if ( filterHash.containsKey(filterString) )
return filterHash.get(filterString);
// empty set for passes filters
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
List<String> fFields = new LinkedList<String>();
// otherwise we have to parse and cache the value
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
fFields.add(filterString);
else
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
fFields = fFields;
if ( cache != null ) cache.put(filterString, fFields);
filterHash.put(filterString, Collections.unmodifiableList(fFields));
return Collections.unmodifiableSet(fFields);
return fFields;
}
@ -192,10 +187,8 @@ public class VCFCodec extends AbstractVCFCodec {
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
double GTQual = VariantContext.NO_LOG10_PERROR;
Set<String> genotypeFilters = null;
Map<String, Object> gtAttributes = null;
String sampleName = sampleNameIterator.next();
final String sampleName = sampleNameIterator.next();
final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
// check to see if the value list is longer than the key list, which is a problem
if (nGTKeys < GTValueSplitSize)
@ -203,23 +196,34 @@ public class VCFCodec extends AbstractVCFCodec {
int genotypeAlleleLocation = -1;
if (nGTKeys >= 1) {
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
gb.maxAttributes(nGTKeys - 1);
for (int i = 0; i < nGTKeys; i++) {
final String gtKey = new String(genotypeKeyArray[i]);
final String gtKey = genotypeKeyArray[i];
boolean missing = i >= GTValueSplitSize;
// todo -- all of these on the fly parsing of the missing value should be static constants
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
genotypeAlleleLocation = i;
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
if ( filters != null ) gb.filters(filters);
} else if ( missing ) {
// if its truly missing (there no provided value) skip adding it to the attributes
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
// don't add missing values to the map
} else {
gtAttributes.put(gtKey, GTValueArray[i]);
if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
gb.GQ(Double.valueOf(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
gb.AD(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
gb.PL(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.DEPTH_KEY)) {
gb.DP(Integer.valueOf(GTValueArray[i]));
} else {
gb.attribute(gtKey, GTValueArray[i]);
}
}
}
}
@ -230,12 +234,13 @@ public class VCFCodec extends AbstractVCFCodec {
if ( genotypeAlleleLocation > 0 )
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
final List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
gb.alleles(GTalleles);
gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1);
// add it to the list
try {
genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
genotypes.add(gb.make());
} catch (TribbleException e) {
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
}
@ -244,6 +249,14 @@ public class VCFCodec extends AbstractVCFCodec {
return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset());
}
private final static int[] decodeInts(final String string) {
final String[] splits = string.split(",");
final int[] values = new int[splits.length];
for ( int i = 0; i < splits.length; i++ )
values[i] = Integer.valueOf(splits[i]);
return values;
}
@Override
public boolean canDecode(final String potentialInput) {
return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);

View File

@ -77,7 +77,7 @@ import java.util.*;
* @author Mark DePristo
* @since 05/12
*/
public final class FastGenotype implements Comparable<FastGenotype> {
public final class FastGenotype implements Genotype {
private final String sampleName;
private final List<Allele> alleles;
private final boolean isPhased;
@ -87,7 +87,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
private final int[] PL;
private final Map<String, Object> extendedAttributes;
private Type type = null;
private GenotypeType type = null;
/**
* The only way to make one of these, for use by GenotypeBuilder only
@ -168,7 +168,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
@Requires("i >=0 && i < getPloidy()")
@Ensures("result != null")
public Allele getAllele(int i) {
if ( getType() == Type.UNAVAILABLE )
if ( getType() == GenotypeType.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
return alleles.get(i);
}
@ -229,26 +229,11 @@ public final class FastGenotype implements Comparable<FastGenotype> {
//
// ---------------------------------------------------------------------------------------------------------
public enum Type {
/** The sample is no-called (all alleles are NO_CALL */
NO_CALL,
/** The sample is homozygous reference */
HOM_REF,
/** The sample is heterozygous, with at least one ref and at least one one alt in any order */
HET,
/** All alleles are non-reference */
HOM_VAR,
/** There is no allele data availble for this sample (alleles.isEmpty) */
UNAVAILABLE,
/** Some chromosomes are NO_CALL and others are called */
MIXED // no-call and call in the same genotype
}
/**
* @return the high-level type of this sample's genotype
*/
@Ensures({"type != null", "result != null"})
public Type getType() {
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
@ -259,10 +244,10 @@ public final class FastGenotype implements Comparable<FastGenotype> {
* Internal code to determine the type of the genotype from the alleles vector
* @return the type
*/
protected Type determineType() {
protected GenotypeType determineType() {
// TODO -- this code is slow and could be optimized for the diploid case
if ( alleles.isEmpty() )
return Type.UNAVAILABLE;
return GenotypeType.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
@ -278,14 +263,14 @@ public final class FastGenotype implements Comparable<FastGenotype> {
if ( sawNoCall ) {
if ( observedAllele == null )
return Type.NO_CALL;
return Type.MIXED;
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
@ -296,37 +281,37 @@ public final class FastGenotype implements Comparable<FastGenotype> {
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == Type.HOM_REF; }
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == Type.HET; }
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == Type.NO_CALL; }
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == Type.MIXED; }
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
// ------------------------------------------------------------------------------
//
@ -450,15 +435,15 @@ public final class FastGenotype implements Comparable<FastGenotype> {
* @return
*/
@Override
public int compareTo(final FastGenotype genotype) {
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
public boolean sameGenotype(final FastGenotype other) {
public boolean sameGenotype(final Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(final FastGenotype other, boolean ignorePhase) {
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
@ -515,6 +500,11 @@ public final class FastGenotype implements Comparable<FastGenotype> {
return hasAttribute(key) ? extendedAttributes.get(key) : defaultValue;
}
@Override
public Object getAttribute(final String key) {
return getAttribute(key, null);
}
// TODO -- add getAttributesAsX interface here
// ------------------------------------------------------------------------------
@ -584,7 +574,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
* manage inline in the Genotype object. They must not appear in the
* extended attributes map
*/
private final static Collection<String> FORBIDDEN_KEYS = Arrays.asList(
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
@ -599,7 +589,7 @@ public final class FastGenotype implements Comparable<FastGenotype> {
* @return
*/
private final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
for ( final String forbidden : FORBIDDEN_KEYS )
for ( final String forbidden : PRIMARY_KEYS)
if ( attributes.containsKey(forbidden) )
return true;
return false;
@ -617,4 +607,44 @@ public final class FastGenotype implements Comparable<FastGenotype> {
return false;
return true;
}
@Override public boolean hasPL() { return PL != null; }
@Override public boolean hasAD() { return AD != null; }
@Override public boolean hasGQ() { return GQ != -1; }
@Override public boolean hasDP() { return DP != -1; }
// TODO -- remove me
@Override public List<String> getFilters() {
return (List<String>)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
}
@Override public boolean isFiltered() { return false; }
@Override public boolean filtersWereApplied() { return false; }
@Override public boolean hasLog10PError() { return hasGQ(); }
@Override public double getLog10PError() { return getGQ() / -10.0; }
@Override public int getPhredScaledQual() { return getGQ(); }
@Override
public String getAttributeAsString(String key, String defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof String ) return (String)x;
return String.valueOf(x); // throws an exception if this isn't a string
}
@Override
public int getAttributeAsInt(String key, int defaultValue) {
Object x = getAttribute(key);
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
if ( x instanceof Integer ) return (Integer)x;
return Integer.valueOf((String)x); // throws an exception if this isn't a string
}
@Override
public double getAttributeAsDouble(String key, double defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
}

View File

@ -12,366 +12,99 @@ import java.util.*;
*
* @author Mark DePristo
*/
public class Genotype implements Comparable<Genotype> {
public interface Genotype extends Comparable<Genotype> {
List<Allele> getAlleles();
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
int countAllele(final Allele allele);
protected CommonInfo commonInfo;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
protected List<Allele> alleles = null; // new ArrayList<Allele>();
protected Type type = null;
Allele getAllele(int i);
protected boolean isPhased = false;
boolean isPhased();
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
}
int getPloidy();
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
if ( alleles == null || alleles.isEmpty() )
this.alleles = Collections.emptyList();
else
this.alleles = Collections.unmodifiableList(alleles);
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
if ( log10Likelihoods != null )
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
this.isPhased = isPhased;
validate();
}
GenotypeType getType();
/**
* Creates a new Genotype for sampleName with genotype according to alleles.
* @param sampleName
* @param alleles
* @param log10PError the confidence in these alleles
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
*/
public Genotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
}
boolean isHom();
public Genotype(String sampleName, List<Allele> alleles, double log10PError) {
this(sampleName, alleles, log10PError, null, null, false);
}
boolean isHomRef();
public Genotype(String sampleName, List<Allele> alleles) {
this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
}
boolean isHomVar();
public Genotype(String sampleName, Genotype parent) {
this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
}
boolean isHet();
boolean isNoCall();
boolean isCalled();
// ---------------------------------------------------------------------------------------------------------
//
// Partial-cloning routines (because Genotype is immutable).
//
// ---------------------------------------------------------------------------------------------------------
boolean isMixed();
public static Genotype modifyName(Genotype g, String name) {
return new Genotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
}
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
}
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
return new Genotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
}
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() {
return alleles;
}
public List<Allele> getAlleles(Allele allele) {
List<Allele> al = new ArrayList<Allele>();
for ( Allele a : alleles )
if ( a.equals(allele) )
al.add(a);
return Collections.unmodifiableList(al);
}
public Allele getAllele(int i) {
if ( getType() == Type.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
return alleles.get(i);
}
public boolean isPhased() { return isPhased; }
/**
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
public int getPloidy() {
return alleles.size();
}
public enum Type {
NO_CALL,
HOM_REF,
HET,
HOM_VAR,
UNAVAILABLE,
MIXED // no-call and call in the same genotype
}
public Type getType() {
if ( type == null ) {
type = determineType();
}
return type;
}
protected Type determineType() {
if ( alleles.size() == 0 )
return Type.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
for ( Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
if ( sawNoCall ) {
if ( observedAllele == null )
return Type.NO_CALL;
return Type.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == Type.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == Type.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == Type.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == Type.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
boolean isAvailable();
//
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
public boolean hasLikelihoods() {
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
}
boolean hasLikelihoods();
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return
*/
public String getLikelihoodsString() {
GenotypeLikelihoods gl = getLikelihoods();
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
}
public GenotypeLikelihoods getLikelihoods() {
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
if ( x != null )
return x;
else {
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
return x;
}
}
String getLikelihoodsString();
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
Object x = getAttribute(key);
if ( x instanceof String ) {
if ( asPL )
return GenotypeLikelihoods.fromPLField((String)x);
else
return GenotypeLikelihoods.fromGLField((String)x);
}
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
else return null;
}
GenotypeLikelihoods getLikelihoods();
public void validate() {
if ( alleles.size() == 0) return;
String getGenotypeString();
// int nNoCalls = 0;
for ( Allele allele : alleles ) {
if ( allele == null )
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
// nNoCalls += allele.isNoCall() ? 1 : 0;
}
String getGenotypeString(boolean ignoreRefState);
// Technically, the spec does allow for the below case so this is not an illegal state
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
}
String toBriefString();
public String getGenotypeString() {
return getGenotypeString(true);
}
public String getGenotypeString(boolean ignoreRefState) {
if ( alleles.size() == 0 )
return null;
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
private List<String> getAlleleStrings() {
List<String> al = new ArrayList<String>();
for ( Allele a : alleles )
al.add(a.getBaseString());
return al;
}
public String toString() {
int Q = getPhredScaledQual();
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
Q == -1 ? "." : String.format("%2d",Q), sortedString(getAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
}
public boolean sameGenotype(Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
}
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
List<String> pairs = new ArrayList<String>();
for (T k : t) {
pairs.add(k + "=" + c.get(k));
}
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
boolean sameGenotype(Genotype other);
boolean sameGenotype(Genotype other, boolean ignorePhase);
// ---------------------------------------------------------------------------------------------------------
//
//
// get routines to access context info fields
//
// ---------------------------------------------------------------------------------------------------------
public String getSampleName() { return commonInfo.getName(); }
public Set<String> getFilters() { return commonInfo.getFilters(); }
public Set<String> getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); }
public boolean isFiltered() { return commonInfo.isFiltered(); }
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
public double getLog10PError() { return commonInfo.getLog10PError(); }
String getSampleName();
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
public int getPhredScaledQual() {
final int i = (int)Math.round(commonInfo.getPhredScaledQual());
return i < 0 ? -1 : i;
}
public int[] getPL();
public boolean hasPL();
public Map<String, Object> getAttributes() { return commonInfo.getAttributes(); }
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
public Object getAttribute(String key, Object defaultValue) {
return commonInfo.getAttribute(key, defaultValue);
}
public int getDP();
public boolean hasDP();
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
public int[] getAD();
public boolean hasAD();
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
public int getGQ();
public boolean hasGQ();
List<String> getFilters();
boolean isFiltered();
boolean filtersWereApplied();
@Deprecated
boolean hasLog10PError();
@Deprecated
double getLog10PError();
@Deprecated
int getPhredScaledQual();
public boolean hasAttribute(final String key);
Map<String, Object> getExtendedAttributes();
Object getAttribute(String key);
Object getAttribute(final String key, final Object defaultValue);
@Deprecated
String getAttributeAsString(String key, String defaultValue);
@Deprecated
int getAttributeAsInt(String key, int defaultValue);
@Deprecated
double getAttributeAsDouble(String key, double defaultValue);
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import java.util.*;
@ -36,6 +37,8 @@ import java.util.*;
* @author Mark DePristo
*/
public final class GenotypeBuilder {
public static boolean MAKE_FAST_BY_DEFAULT = false;
private String sampleName = null;
private List<Allele> alleles = null;
@ -46,8 +49,41 @@ public final class GenotypeBuilder {
private int[] PL = null;
private Map<String, Object> extendedAttributes = null;
private boolean useFast = MAKE_FAST_BY_DEFAULT;
private final static Map<String, Object> NO_ATTRIBUTES =
new HashMap<String, Object>(0);
Collections.unmodifiableMap(new HashMap<String, Object>(0));
// -----------------------------------------------------------------
//
// Factory methods
//
// -----------------------------------------------------------------
public final static Genotype create(final String sampleName, final List<Allele> alleles) {
return new GenotypeBuilder(sampleName, alleles).make();
}
public final static Genotype create(final String sampleName,
final List<Allele> alleles,
final Map<String, Object> attributes) {
return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make();
}
protected final static Genotype create(final String sampleName,
final List<Allele> alleles,
final double[] gls) {
return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
}
public final static Genotype create(final String sampleName,
final List<Allele> alleles,
final double log10Perror,
final Map<String, Object> attributes) {
return new GenotypeBuilder(sampleName, alleles)
.GQ(log10Perror == SlowGenotype.NO_LOG10_PERROR ? -1 : (int)(log10Perror * -10))
.attributes(attributes).make();
}
/**
* Create a empty builder. Both a sampleName and alleles must be provided
@ -78,7 +114,7 @@ public final class GenotypeBuilder {
* Create a new builder starting with the values in Genotype g
* @param g
*/
public GenotypeBuilder(final FastGenotype g) {
public GenotypeBuilder(final Genotype g) {
copy(g);
}
@ -87,7 +123,7 @@ public final class GenotypeBuilder {
* @param g
* @return
*/
public GenotypeBuilder copy(final FastGenotype g) {
public GenotypeBuilder copy(final Genotype g) {
name(g.getSampleName());
alleles(g.getAlleles());
phased(g.isPhased());
@ -125,9 +161,31 @@ public final class GenotypeBuilder {
* @return a newly minted Genotype object with values provided from this builder
*/
@Ensures({"result != null"})
public FastGenotype make() {
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
public Genotype make() {
if ( useFast ) {
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
} else {
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
final double log10PError = GQ != -1 ? GQ / -10.0 : SlowGenotype.NO_LOG10_PERROR;
Set<String> filters = Collections.emptySet();
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) ) {
filters = new LinkedHashSet<String>((List<String>)extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY));
attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
}
if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
return new SlowGenotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10likelihoods);
}
}
public GenotypeBuilder useFast(boolean useFast) {
this.useFast = useFast;
return this;
}
@Requires({"sampleName != null"})
@ -152,12 +210,21 @@ public final class GenotypeBuilder {
}
@Requires({"GQ >= -1"})
@Ensures({"this.GQ == GQ"})
@Ensures({"this.GQ == GQ", "this.GQ >= -1"})
public GenotypeBuilder GQ(final int GQ) {
this.GQ = GQ;
return this;
}
public GenotypeBuilder GQ(final double GQ) {
return GQ((int)Math.round(GQ));
}
public GenotypeBuilder noGQ() { GQ = -1; return this; }
public GenotypeBuilder noAD() { AD = null; return this; }
public GenotypeBuilder noDP() { DP = -1; return this; }
public GenotypeBuilder noPL() { PL = null; return this; }
@Requires({"DP >= -1"})
@Ensures({"this.DP == DP"})
public GenotypeBuilder DP(final int DP) {
@ -179,8 +246,15 @@ public final class GenotypeBuilder {
return this;
}
@Requires("PL == null || PL.length > 0")
@Ensures({"this.PL == PL"})
public GenotypeBuilder PL(final double[] GLs) {
this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).getAsPLs();
return this;
}
@Requires("attributes != null")
@Ensures("extendedAttributes != null")
@Ensures("attributes.isEmpty() || extendedAttributes != null")
public GenotypeBuilder attributes(final Map<String, Object> attributes) {
for ( Map.Entry<String, Object> pair : attributes.entrySet() )
attribute(pair.getKey(), pair.getValue());
@ -195,4 +269,15 @@ public final class GenotypeBuilder {
extendedAttributes.put(key, value);
return this;
}
public GenotypeBuilder filters(final List<String> filters) {
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
// TODO
return this;
}
public GenotypeBuilder maxAttributes(final int i) {
// TODO
return this;
}
}

View File

@ -122,25 +122,25 @@ public class GenotypeLikelihoods {
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
//Returns null in case of missing likelihoods
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
public EnumMap<GenotypeType,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
if(likelihoods == null)
return null;
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
return likelihoodsMap;
}
//Return the neg log10 Genotype Quality (GQ) for the given genotype
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
public double getLog10GQ(Genotype.Type genotype){
return getQualFromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
public double getLog10GQ(GenotypeType genotype){
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
}
public static double getQualFromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
if(likelihoods == null)
return Double.NEGATIVE_INFINITY;

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext;
/**
* Summary types for Genotype objects
*
* @author Your Name
* @since Date created
*/
public enum GenotypeType {
/** The sample is no-called (all alleles are NO_CALL */
NO_CALL,
/** The sample is homozygous reference */
HOM_REF,
/** The sample is heterozygous, with at least one ref and at least one one alt in any order */
HET,
/** All alleles are non-reference */
HOM_VAR,
/** There is no allele data availble for this sample (alleles.isEmpty) */
UNAVAILABLE,
/** Some chromosomes are NO_CALL and others are called */
MIXED // no-call and call in the same genotype
}

View File

@ -272,6 +272,17 @@ public class GenotypesContext implements List<Genotype> {
}
}
// ---------------------------------------------------------------------------
//
// Lazy methods
//
// ---------------------------------------------------------------------------
public boolean isLazyWithData() {
return this instanceof LazyGenotypesContext &&
((LazyGenotypesContext)this).getUnparsedGenotypeData() != null;
}
// ---------------------------------------------------------------------------
//
// Map methods

View File

@ -0,0 +1,445 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* This class encompasses all the basic information about a genotype. It is immutable.
*
* @author Mark DePristo
*/
public class SlowGenotype implements Genotype {
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
protected CommonInfo commonInfo;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
protected List<Allele> alleles = null; // new ArrayList<Allele>();
protected GenotypeType type = null;
protected boolean isPhased = false;
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
// this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
// }
protected SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
if ( alleles == null || alleles.isEmpty() )
this.alleles = Collections.emptyList();
else
this.alleles = Collections.unmodifiableList(alleles);
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
if ( log10Likelihoods != null )
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
this.isPhased = isPhased;
validate();
}
// /**
// * Creates a new Genotype for sampleName with genotype according to alleles.
// * @param sampleName
// * @param alleles
// * @param log10PError the confidence in these alleles
// * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
// */
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
// this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
// }
//
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError) {
// this(sampleName, alleles, log10PError, null, null, false);
// }
//
// public SlowGenotype(String sampleName, List<Allele> alleles) {
// this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
// }
//
// public SlowGenotype(String sampleName, SlowGenotype parent) {
// this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
// }
//
//
//
// // ---------------------------------------------------------------------------------------------------------
// //
// // Partial-cloning routines (because Genotype is immutable).
// //
// // ---------------------------------------------------------------------------------------------------------
//
// public static SlowGenotype modifyName(SlowGenotype g, String name) {
// return new SlowGenotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
// }
//
// public static SlowGenotype modifyAttributes(SlowGenotype g, Map<String, Object> attributes) {
// return new SlowGenotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
// }
//
// public static SlowGenotype modifyAlleles(SlowGenotype g, List<Allele> alleles) {
// return new SlowGenotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
// }
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() {
return alleles;
}
public int countAllele(final Allele allele) {
int c = 0;
for ( final Allele a : alleles )
if ( a.equals(allele) )
c++;
return c;
}
public Allele getAllele(int i) {
if ( getType() == GenotypeType.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
return alleles.get(i);
}
public boolean isPhased() { return isPhased; }
/**
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
public int getPloidy() {
return alleles.size();
}
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
return type;
}
protected GenotypeType determineType() {
if ( alleles.size() == 0 )
return GenotypeType.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
for ( Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
if ( sawNoCall ) {
if ( observedAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
//
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
public boolean hasLikelihoods() {
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
}
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return
*/
public String getLikelihoodsString() {
GenotypeLikelihoods gl = getLikelihoods();
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
}
public GenotypeLikelihoods getLikelihoods() {
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
if ( x != null )
return x;
else {
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
return x;
}
}
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
Object x = getAttribute(key);
if ( x instanceof String ) {
if ( asPL )
return GenotypeLikelihoods.fromPLField((String)x);
else
return GenotypeLikelihoods.fromGLField((String)x);
}
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
else return null;
}
public void validate() {
if ( alleles.size() == 0) return;
// int nNoCalls = 0;
for ( Allele allele : alleles ) {
if ( allele == null )
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
// nNoCalls += allele.isNoCall() ? 1 : 0;
}
// Technically, the spec does allow for the below case so this is not an illegal state
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
}
public String getGenotypeString() {
return getGenotypeString(true);
}
public String getGenotypeString(boolean ignoreRefState) {
if ( alleles.size() == 0 )
return null;
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
private List<String> getAlleleStrings() {
List<String> al = new ArrayList<String>();
for ( Allele a : alleles )
al.add(a.getBaseString());
return al;
}
public String toString() {
int Q = getPhredScaledQual();
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
Q == -1 ? "." : String.format("%2d",Q), sortedString(getExtendedAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
}
public boolean sameGenotype(Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
}
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
List<String> pairs = new ArrayList<String>();
for (T k : t) {
pairs.add(k + "=" + c.get(k));
}
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines to access context info fields
//
// ---------------------------------------------------------------------------------------------------------
public String getSampleName() { return commonInfo.getName(); }
public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
public boolean isFiltered() { return commonInfo.isFiltered(); }
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
public double getLog10PError() { return commonInfo.getLog10PError(); }
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
public int getPhredScaledQual() {
if ( commonInfo.hasLog10PError() )
return (int)Math.round(commonInfo.getPhredScaledQual());
else
return -1;
}
@Override
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
@Override
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
public Object getAttribute(String key, Object defaultValue) {
return commonInfo.getAttribute(key, defaultValue);
}
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
@Override
public int[] getPL() {
return hasPL() ? getLikelihoods().getAsPLs() : null;
}
@Override
public boolean hasPL() {
return hasLikelihoods();
}
@Override
public int getDP() {
return getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
}
@Override
public boolean hasDP() {
return hasAttribute(VCFConstants.DEPTH_KEY);
}
@Override
public int[] getAD() {
if ( hasAD() ) {
return (int[])getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
} else
return null;
}
@Override
public boolean hasAD() {
return hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
}
@Override
public int getGQ() {
return getPhredScaledQual();
}
@Override
public boolean hasGQ() {
return hasLikelihoods();
}
@Override
public Map<String, Object> getExtendedAttributes() {
final Map<String, Object> ea = new LinkedHashMap<String, Object>(commonInfo.getAttributes());
for ( final String primary : FastGenotype.PRIMARY_KEYS )
ea.remove(primary);
return ea;
}
}

View File

@ -327,19 +327,36 @@ public class VariantContext implements Feature { // to enable tribble integratio
//
// ---------------------------------------------------------------------------------------------------------
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
VariantContextBuilder builder = new VariantContextBuilder(this);
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
}
/**
* This method subsets down to a set of samples.
*
* At the same time returns the alleles to just those in use by the samples,
* if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
* in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples
*
* @param sampleNames
* @return
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
public VariantContext subContextFromSamples(Set<String> sampleNames) {
VariantContextBuilder builder = new VariantContextBuilder(this);
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
if ( rederiveAllelesFromGenotypes )
builder.alleles(allelesOfGenotypes(newGenotypes));
else {
builder.alleles(alleles);
}
return builder.genotypes(newGenotypes).make();
}
}
public VariantContext subContextFromSample(String sampleName) {
return subContextFromSamples(Collections.singleton(sampleName));
return subContextFromSamples(Collections.singleton(sampleName), true);
}
/**
@ -892,7 +909,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
for ( final Genotype g : genotypes ) {
n += g.getAlleles(a).size();
n += g.countAllele(a);
}
return n;
@ -922,7 +939,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
private void calculateGenotypeCounts() {
if ( genotypeCounts == null ) {
genotypeCounts = new int[Genotype.Type.values().length];
genotypeCounts = new int[GenotypeType.values().length];
for ( final Genotype g : getGenotypes() ) {
genotypeCounts[g.getType().ordinal()]++;
@ -937,7 +954,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public int getNoCallCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.NO_CALL.ordinal()];
return genotypeCounts[GenotypeType.NO_CALL.ordinal()];
}
/**
@ -947,7 +964,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public int getHomRefCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.HOM_REF.ordinal()];
return genotypeCounts[GenotypeType.HOM_REF.ordinal()];
}
/**
@ -957,7 +974,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public int getHetCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.HET.ordinal()];
return genotypeCounts[GenotypeType.HET.ordinal()];
}
/**
@ -967,7 +984,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public int getHomVarCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
return genotypeCounts[GenotypeType.HOM_VAR.ordinal()];
}
/**
@ -977,7 +994,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public int getMixedCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
return genotypeCounts[GenotypeType.MIXED.ordinal()];
}
// ---------------------------------------------------------------------------------------------------------
@ -1412,8 +1429,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
final Map<String, Object> map = fullyDecodeAttributes(g.getAttributes(), header);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), map, g.isPhased());
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
return new GenotypeBuilder(g).attributes(map).make();
}
// ---------------------------------------------------------------------------------------------------------

View File

@ -99,7 +99,7 @@ public class VariantContextUtils {
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
@ -109,10 +109,10 @@ public class VariantContextUtils {
alleleCounts.add(vc.getCalledChrCount(allele));
foundersAlleleCounts.add(foundersAltChromosomes);
if ( AN == 0 ) {
alleleFreqs.add("0.0");
alleleFreqs.add(0.0);
} else {
// todo -- this is a performance problem
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
final Double freq = Double.valueOf(String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes)));
alleleFreqs.add(freq);
}
}
@ -167,10 +167,10 @@ public class VariantContextUtils {
}
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
if ( g.hasLikelihoods() )
return new GenotypeBuilder(g).noPL().make();
else
return g;
}
/**
@ -257,8 +257,7 @@ public class VariantContextUtils {
newGenotypeAlleles.add(Allele.NO_CALL);
}
}
genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getLog10PError(),
g.getFilters(), g.getAttributes(), g.isPhased()));
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
}
@ -475,9 +474,9 @@ public class VariantContextUtils {
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(),
genotypeAttributes, g.isPhased()));
// TODO -- fixme
//Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
//genotypes.add(new GenotypeBuilder(g).attributes(genotypeAttributes).make());
}
return builder.genotypes(genotypes).attributes(attributes);
@ -833,7 +832,7 @@ public class VariantContextUtils {
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
@ -878,7 +877,7 @@ public class VariantContextUtils {
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
@ -1073,7 +1072,7 @@ public class VariantContextUtils {
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
newG = new Genotype(name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased());
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
}
mergedGenotypes.add(newG);
@ -1113,7 +1112,7 @@ public class VariantContextUtils {
newAllele = Allele.NO_CALL;
newAlleles.add(newAllele);
}
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
@ -1126,11 +1125,11 @@ public class VariantContextUtils {
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getAttributes().entrySet() ) {
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
}
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
@ -1247,7 +1246,7 @@ public class VariantContextUtils {
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
if ( !g.hasLikelihoods() ) {
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
continue;
}
@ -1268,51 +1267,35 @@ public class VariantContextUtils {
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 )
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
gb.noPL();
else
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
else
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
gb.alleles(NO_CALL_ALLELES);
}
else {
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
if ( numNewAltAlleles != 0 ) gb.GQ(-10 * GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}
newGTs.add(gb.make());
}
}
return newGTs;
}
/**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
*
* @param originalGT the original genotype
* @param newLikelihoods the PL array
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
* @param attrs the annotations to use when creating the genotype
*
* @return genotype
*/
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
final int numNewAltAlleles = allelesToUse.size() - 1;
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
myAlleles.add(allelesToUse.get(alleles.alleleIndex1));
myAlleles.add(allelesToUse.get(alleles.alleleIndex2));
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
}
/**
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
* contraction of a series of identical bases in the reference.

View File

@ -193,7 +193,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getPhredScaledQual());
for ( Map.Entry<String, Object> e : g.getAttributes().entrySet() ) {
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
infoMap.put(e.getKey(), e.getValue());
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
@ -38,6 +39,7 @@ import java.io.*;
import java.util.*;
class BCF2Writer extends IndexingVariantContextWriter {
private final static boolean FULLY_DECODE = true;
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
@ -47,6 +49,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
private final boolean doNotWriteGenotypes;
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
@ -100,7 +103,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
@Override
public void add( final VariantContext initialVC ) {
final VariantContext vc = initialVC.fullyDecode(header);
final VariantContext vc = FULLY_DECODE ? initialVC.fullyDecode(header) : initialVC;
super.add(vc); // allow on the fly indexing
try {
@ -162,7 +165,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
// info fields
final int nAlleles = vc.getNAlleles();
final int nInfo = vc.getAttributes().size();
final int nGenotypeFormatFields = VCFWriter.calcVCFGenotypeKeys(vc, header).size();
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
final int nSamples = vc.getNSamples();
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x00FF), BCF2Type.INT32);
@ -176,6 +179,30 @@ class BCF2Writer extends IndexingVariantContextWriter {
return encoder.getRecordBytes();
}
private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
if ( vc.getGenotypes().isLazyWithData() ) {
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
}
return null;
}
/**
* Try to get the nGenotypeFields as efficiently as possible.
*
* If this is a lazy BCF2 object just grab the field count from there,
* otherwise do the whole counting by types test in the actual data
*
* @param vc
* @return
*/
private final int getNGenotypeFormatFields(final VariantContext vc) {
final BCF2Codec.LazyData lazyData = getLazyData(vc);
return lazyData != null ? lazyData.nGenotypeFields : VCFWriter.calcVCFGenotypeKeys(vc, header).size();
}
private void buildID( VariantContext vc ) throws IOException {
encoder.encodeTyped(vc.getID(), BCF2Type.CHAR);
}
@ -206,86 +233,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
}
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
addGenotypes(vc);
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
addGQ(vc);
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
addGenotypeFilters(vc);
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
addPLs(vc);
} else {
addGenericGenotypeField(vc, field);
}
}
// --------------------------------------------------------------------------------
//
// Type matching routines between VCF and BCF
//
// --------------------------------------------------------------------------------
return encoder.getRecordBytes();
}
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
assert metaData != null; // field is supposed to be in header
int nFields = metaData.getCount(vc.getNAlleles() - 1);
if ( nFields == -1 ) { // unbounded, need to look at values
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
} else {
return nFields;
}
}
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc) {
int size = -1;
final GenotypesContext gc = vc.getGenotypes();
for ( final Genotype g : gc ) {
final Object o = g.getAttribute(field);
if ( o == null ) continue;
if ( o instanceof List ) {
// only do compute if first value is of type list
size = Math.max(size, ((List)o).size());
} else if ( size == -1 )
size = 1;
}
return size;
}
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
final int numInFormatField = getNGenotypeFieldValues(field, vc);
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
for ( final String name : header.getGenotypeSamples() ) {
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
try {
final Object fieldValue = g.getAttribute(field);
if ( numInFormatField == 1 ) {
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
} else {
// multiple values, need to handle general case
final List<Object> asList = toList(fieldValue);
final int nSampleValues = asList.size();
for ( int i = 0; i < numInFormatField; i++ ) {
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
}
}
} catch ( ClassCastException e ) {
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
}
}
}
private final static List<Object> toList(final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<Object>)o;
else return Collections.singletonList(o);
}
private final class VCFToBCFEncoding {
VCFHeaderLineType vcfType;
@ -356,40 +309,116 @@ class BCF2Writer extends IndexingVariantContextWriter {
// }
}
private final void addGQ(final VariantContext vc) throws IOException {
startGenotypeField(VCFConstants.GENOTYPE_QUALITY_KEY, 1, BCF2Type.INT8);
// --------------------------------------------------------------------------------
//
// Genotype field encoding
//
// --------------------------------------------------------------------------------
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
BCF2Codec.LazyData lazyData = getLazyData(vc);
if ( lazyData != null ) {
logger.info("Lazy decoded data detected, writing bytes directly to stream. N = " + lazyData.bytes.length);
return lazyData.bytes;
} else {
final GenotypesContext gc = vc.getGenotypes();
// we have to do work to convert the VC into a BCF2 byte stream
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
addGenotypes(vc);
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
addIntGenotypeField(field, intGenotypeFieldAccessors.getAccessor(field), vc);
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
addGenotypeFilters(vc);
} else {
addGenericGenotypeField(vc, field);
}
}
return encoder.getRecordBytes();
}
}
private final int getNGenotypeFieldValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
assert metaData != null; // field is supposed to be in header
int nFields = metaData.getCount(vc.getNAlleles() - 1);
if ( nFields == -1 ) { // unbounded, need to look at values
return computeMaxSizeOfGenotypeFieldFromValues(field, vc, ige);
} else {
return nFields;
}
}
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
int size = -1;
final GenotypesContext gc = vc.getGenotypes();
if ( ige == null ) {
for ( final Genotype g : gc ) {
final Object o = g.getAttribute(field);
if ( o == null ) continue;
if ( o instanceof List ) {
// only do compute if first value is of type list
size = Math.max(size, ((List)o).size());
} else if ( size == -1 )
size = 1;
}
} else {
for ( final Genotype g : gc ) {
size = Math.max(size, ige.getSize(g));
}
}
return size;
}
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
final int numInFormatField = getNGenotypeFieldValues(field, vc, null);
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
for ( final String name : header.getGenotypeSamples() ) {
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
if ( g.hasLog10PError() ) {
final int GQ = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL);
if ( GQ > VCFConstants.MAX_GENOTYPE_QUAL ) throw new ReviewedStingException("Unexpectedly large GQ " + GQ + " at " + vc);
encoder.encodeRawValue(GQ, BCF2Type.INT8);
} else {
encoder.encodeRawMissingValues(1, BCF2Type.INT8);
try {
final Object fieldValue = g.getAttribute(field);
if ( numInFormatField == 1 ) {
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
} else {
// multiple values, need to handle general case
final List<Object> asList = toList(fieldValue);
final int nSampleValues = asList.size();
for ( int i = 0; i < numInFormatField; i++ ) {
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
}
}
} catch ( ClassCastException e ) {
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
}
}
}
/**
* Horrible special case to deal with the GenotypeLikelihoods class
* @param vc
* @throws IOException
*/
private final void addPLs(final VariantContext vc) throws IOException {
final String field = VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY;
final int numPLs = getNGenotypeFieldValues(field, vc);
private final void addIntGenotypeField(final String field,
final IntGenotypeFieldAccessors.Accessor ige,
final VariantContext vc) throws IOException {
final int numPLs = getNGenotypeFieldValues(field, vc, ige);
final int[] allPLs = new int[numPLs * vc.getNSamples()];
// collect all of the PLs into a single vector of values
int i = 0;
for ( final String name : header.getGenotypeSamples() ) {
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
final GenotypeLikelihoods gls = g.getLikelihoods();
final int[] pls = gls != null ? g.getLikelihoods().getAsPLs() : null;
final int[] pls = ige.getValues(g);
if ( pls == null )
for ( int j = 0; j < numPLs; j++) allPLs[i++] = -1;
else
else {
assert pls.length == numPLs;
for ( int pl : pls ) allPLs[i++] = pl;
}
}
// determine the best size
@ -427,6 +456,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
}
// --------------------------------------------------------------------------------
//
// Low-level block encoding
//
// --------------------------------------------------------------------------------
/**
* Write the data in the encoder to the outputstream as a length encoded
* block of data. After this call the encoder stream will be ready to
@ -488,4 +523,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
encodeStringByRef(key);
encoder.encodeType(size, valueType);
}
private final static List<Object> toList(final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<Object>)o;
else return Collections.singletonList(o);
}
}

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext.writer;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.HashMap;
/**
* A convenient way to provide a single view on the many int and int[] field values we work with,
* for writing out the values. This class makes writing out the inline AD, GQ, PL, DP fields
* easy and fast
*
* @author Mark DePristo
* @since 6/12
*/
class IntGenotypeFieldAccessors {
// initialized once per writer to allow parallel writers to work
private final HashMap<String, Accessor> intGenotypeFieldEncoders =
new HashMap<String, Accessor>();
public IntGenotypeFieldAccessors() {
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new IntGenotypeFieldAccessors.ADAccessor());
intGenotypeFieldEncoders.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new IntGenotypeFieldAccessors.PLAccessor());
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_QUALITY_KEY, new IntGenotypeFieldAccessors.GQAccessor());
}
/**
* Return an accessor for field, or null if none exists
* @param field
* @return
*/
public Accessor getAccessor(final String field) {
return intGenotypeFieldEncoders.get(field);
}
public static abstract class Accessor {
public abstract int[] getValues(final Genotype g);
public int getSize(final Genotype g) {
final int[] v = getValues(g);
return v == null ? 0 : v.length;
}
}
private static abstract class AtomicAccessor extends Accessor {
private final int[] singleton = new int[1];
@Override
public int[] getValues(final Genotype g) {
singleton[0] = getValue(g);
return singleton[0] == -1 ? null : singleton;
}
public abstract int getValue(final Genotype g);
}
public static class GQAccessor extends AtomicAccessor {
@Override public int getValue(final Genotype g) { return Math.min(g.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL); }
}
public static class DPAccessor extends AtomicAccessor {
@Override public int getValue(final Genotype g) { return g.getDP(); }
}
public static class ADAccessor extends Accessor {
@Override public int[] getValues(final Genotype g) { return g.getAD(); }
}
public static class PLAccessor extends Accessor {
@Override public int[] getValues(final Genotype g) { return g.getPL(); }
}
}

View File

@ -53,28 +53,7 @@ class VCFWriter extends IndexingVariantContextWriter {
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
// /**
// * create a VCF writer, given a file to write to
// *
// * @param location the file location to write to
// */
// public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) {
// this(location, openOutputStream(location), refDict, true, false);
// }
//
// public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) {
// this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false);
// }
//
// /**
// * create a VCF writer, given a stream to write to
// *
// * @param output the file location to write to
// * @param doNotWriteGenotypes do not write genotypes
// */
// public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) {
// this(null, output, refDict, false, doNotWriteGenotypes);
// }
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
@ -250,7 +229,7 @@ class VCFWriter extends IndexingVariantContextWriter {
// FORMAT
final GenotypesContext gc = vc.getGenotypes();
if ( gc instanceof LazyGenotypesContext && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() != null) {
if ( gc.isLazyWithData() && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() instanceof String ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData().toString());
} else {
@ -360,9 +339,9 @@ class VCFWriter extends IndexingVariantContextWriter {
}
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String key : genotypeFormatKeys ) {
for ( String field : genotypeFormatKeys ) {
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
if ( !g.isAvailable() ) {
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
}
@ -376,36 +355,50 @@ class VCFWriter extends IndexingVariantContextWriter {
continue;
}
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
// some exceptions
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( ! g.hasLog10PError() )
val = VCFConstants.MISSING_VALUE_v4;
String outputValue;
final IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.getAccessor(field);
if ( accessor != null ) {
final int[] intValues = accessor.getValues(g);
if ( intValues == null )
outputValue = VCFConstants.MISSING_VALUE_v4;
else if ( intValues.length == 1 ) // fast path
outputValue = Integer.toString(intValues[0]);
else {
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
for ( int i = 1; i < numInFormatField; i++ ) {
StringBuilder sb = new StringBuilder();
sb.append(intValues[0]);
for ( int i = 1; i < intValues.length; i++) {
sb.append(",");
sb.append(VCFConstants.MISSING_VALUE_v4);
sb.append(intValues[i]);
}
val = sb.toString();
outputValue = sb.toString();
}
} else {
Object val = g.hasAttribute(field) ? g.getAttribute(field) : VCFConstants.MISSING_VALUE_v4;
// some exceptions
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
if ( metaData != null ) {
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
for ( int i = 1; i < numInFormatField; i++ ) {
sb.append(",");
sb.append(VCFConstants.MISSING_VALUE_v4);
}
val = sb.toString();
}
}
// assume that if key is absent, then the given string encoding suffices
outputValue = formatVCFField(val);
}
// assume that if key is absent, then the given string encoding suffices
String outputValue = formatVCFField(val);
if ( outputValue != null )
attrs.add(outputValue);
}
@ -475,21 +468,24 @@ class VCFWriter extends IndexingVariantContextWriter {
boolean sawGoodGT = false;
boolean sawGoodQual = false;
boolean sawGenotypeFilter = false;
boolean sawDP = false;
boolean sawAD = false;
boolean sawPL = false;
for ( final Genotype g : vc.getGenotypes() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.isAvailable() )
sawGoodGT = true;
if ( g.hasLog10PError() )
sawGoodQual = true;
if (g.isFiltered() && g.isCalled())
sawGenotypeFilter = true;
keys.addAll(g.getExtendedAttributes().keySet());
if ( g.isAvailable() ) sawGoodGT = true;
if ( g.hasGQ() ) sawGoodQual = true;
if ( g.hasDP() ) sawDP = true;
if ( g.hasAD() ) sawAD = true;
if ( g.hasPL() ) sawPL = true;
if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
}
if ( sawGoodQual )
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if (sawGenotypeFilter)
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if ( sawDP ) keys.add(VCFConstants.DEPTH_KEY);
if ( sawAD ) keys.add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
if ( sawPL ) keys.add(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
@ -50,7 +51,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
}
private static Genotype createGenotype(String name, double[] gls) {
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
return new GenotypeBuilder(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(gls).make();
}
@DataProvider(name = "getGLs")

View File

@ -10,128 +10,128 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
}
@Test
public void testComplexSelection() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
);
spec.disableShadowBCF();
executeTest("testComplexSelection--" + testfile, spec);
}
@Test
public void testSampleExclusion() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1,
Arrays.asList("bbd7b28d1c5701e17b395d64f8b6f13d")
);
spec.disableShadowBCF();
executeTest("testSampleExclusion--" + testfile, spec);
}
@Test
public void testRepeatedLineSelection() {
String testfile = testDir + "test.dup.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
1,
Arrays.asList("77579c53dbde4e8171f3cee83b98351b")
);
executeTest("testRepeatedLineSelection--" + testfile, spec);
}
@Test
public void testDiscordance() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("03abdc27bfd7aa36d57bba0325b31e0d")
);
spec.disableShadowBCF();
executeTest("testDiscordance--" + testFile, spec);
}
@Test
public void testDiscordanceNoSampleSpecified() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("9fb54ed003234a5847c565ffb6767b95")
);
spec.disableShadowBCF();
executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
}
@Test
public void testConcordance() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("76857b016198c3e08a2e27bbdb49f3f0")
);
spec.disableShadowBCF();
executeTest("testConcordance--" + testFile, spec);
}
@Test
public void testVariantTypeSelection() {
String testFile = testDir + "complexExample1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("6c0b0c5f03d26f4a7a1438a2afc9fb6b")
);
executeTest("testVariantTypeSelection--" + testFile, spec);
}
@Test
public void testUsingDbsnpName() {
String testFile = testDir + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("a8a26c621018142c9cba1080cbe687a8")
);
executeTest("testUsingDbsnpName--" + testFile, spec);
}
@Test
public void testRegenotype() {
String testFile = testDir + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("6bee6dc2316aa539560a6d9d8adbc4ff")
);
executeTest("testRegenotype--" + testFile, spec);
}
// @Test
// public void testComplexSelection() {
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
// 1,
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
// );
// spec.disableShadowBCF();
// executeTest("testComplexSelection--" + testfile, spec);
// }
//
// @Test
// public void testSampleExclusion() {
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
// 1,
// Arrays.asList("bbd7b28d1c5701e17b395d64f8b6f13d")
// );
// spec.disableShadowBCF();
//
// executeTest("testSampleExclusion--" + testfile, spec);
// }
//
// @Test
// public void testRepeatedLineSelection() {
// String testfile = testDir + "test.dup.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// baseTestString(" -sn A -sn B -sn C --variant " + testfile),
// 1,
// Arrays.asList("77579c53dbde4e8171f3cee83b98351b")
// );
//
// executeTest("testRepeatedLineSelection--" + testfile, spec);
// }
//
// @Test
// public void testDiscordance() {
// String testFile = testDir + "NA12878.hg19.example1.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("03abdc27bfd7aa36d57bba0325b31e0d")
// );
// spec.disableShadowBCF();
//
// executeTest("testDiscordance--" + testFile, spec);
// }
//
// @Test
// public void testDiscordanceNoSampleSpecified() {
// String testFile = testDir + "NA12878.hg19.example1.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("9fb54ed003234a5847c565ffb6767b95")
// );
// spec.disableShadowBCF();
//
// executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
// }
//
// @Test
// public void testConcordance() {
// String testFile = testDir + "NA12878.hg19.example1.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("76857b016198c3e08a2e27bbdb49f3f0")
// );
// spec.disableShadowBCF();
//
// executeTest("testConcordance--" + testFile, spec);
// }
//
// @Test
// public void testVariantTypeSelection() {
// String testFile = testDir + "complexExample1.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("6c0b0c5f03d26f4a7a1438a2afc9fb6b")
// );
//
// executeTest("testVariantTypeSelection--" + testFile, spec);
// }
//
// @Test
// public void testUsingDbsnpName() {
// String testFile = testDir + "combine.3.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("a8a26c621018142c9cba1080cbe687a8")
// );
//
// executeTest("testUsingDbsnpName--" + testFile, spec);
// }
//
// @Test
// public void testRegenotype() {
// String testFile = testDir + "combine.3.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("6bee6dc2316aa539560a6d9d8adbc4ff")
// );
//
// executeTest("testRegenotype--" + testFile, spec);
// }
@Test
public void testMultipleRecordsAtOnePosition() {
@ -146,58 +146,58 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
}
@Test
public void testNoGTs() {
String testFile = testDir + "vcf4.1.example.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("95c4d43b11c3d0dd3ab19941c474269b")
);
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
}
@Test
public void testParallelization2() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
);
spec.disableShadowBCF();
executeTest("testParallelization (2 threads)--" + testfile, spec);
}
@Test
public void testParallelization4() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
);
spec.disableShadowBCF();
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
@Test
public void testSelectFromMultiAllelic() {
String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
String samplesFile = testDir + "GIH.samples.list";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
1,
Arrays.asList("fa92b3b41f1c04f685be8de32afc9706")
);
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
}
// @Test
// public void testNoGTs() {
// String testFile = testDir + "vcf4.1.example.vcf";
//
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
// 1,
// Arrays.asList("95c4d43b11c3d0dd3ab19941c474269b")
// );
//
// executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
// }
//
// @Test
// public void testParallelization2() {
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
// WalkerTestSpec spec;
//
// spec = new WalkerTestSpec(
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
// 1,
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
// );
// spec.disableShadowBCF();
// executeTest("testParallelization (2 threads)--" + testfile, spec);
// }
//
// @Test
// public void testParallelization4() {
// String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
// String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
// WalkerTestSpec spec;
// spec = new WalkerTestSpec(
// baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
// 1,
// Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
// );
// spec.disableShadowBCF();
//
// executeTest("testParallelization (4 threads)--" + testfile, spec);
// }
//
// @Test
// public void testSelectFromMultiAllelic() {
// String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
// String samplesFile = testDir + "GIH.samples.list";
// WalkerTestSpec spec = new WalkerTestSpec(
// "-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
// 1,
// Arrays.asList("fa92b3b41f1c04f685be8de32afc9706")
// );
// executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
// }
}

View File

@ -120,12 +120,8 @@ public class VCFWriterUnitTest extends BaseTest {
attributes.put("DP","50");
for (String name : header.getGenotypeSamples()) {
Map<String, Object> gtattributes = new HashMap<String,Object>();
gtattributes.put("BB","1");
Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true);
Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
genotypes.add(gt);
}
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();

View File

@ -76,17 +76,17 @@ public class GenotypeLikelihoodsUnitTest {
public void testGetAsMap(){
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(v);
//Log scale
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
EnumMap<GenotypeType,Double> glMap = gl.getAsMap(false);
Assert.assertEquals(v[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
Assert.assertEquals(v[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
Assert.assertEquals(v[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
//Linear scale
glMap = gl.getAsMap(true);
double [] vl = MathUtils.normalizeFromLog10(v);
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
Assert.assertEquals(vl[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
Assert.assertEquals(vl[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
Assert.assertEquals(vl[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
//Test missing likelihoods
gl = GenotypeLikelihoods.fromPLField(".");
@ -111,19 +111,19 @@ public class GenotypeLikelihoodsUnitTest {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromPLField(vPLString);
//GQ for the best guess genotype
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),-3.9);
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),-3.9);
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
//GQ for the other genotypes
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF), Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR), Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF), Math.log10(1.0 - test[GenotypeType.HOM_REF.ordinal()-1]));
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_VAR), Math.log10(1.0 - test[GenotypeType.HOM_VAR.ordinal()-1]));
//Test missing likelihoods
gl = GenotypeLikelihoods.fromPLField(".");
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_VAR),Double.NEGATIVE_INFINITY);
}
@ -134,7 +134,7 @@ public class GenotypeLikelihoodsUnitTest {
double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294};
for ( int i = 0; i < likelihoods.length; i++ ) {
Assert.assertEquals(GenotypeLikelihoods.getQualFromLikelihoods(i, likelihoods), expectedQuals[i], 1e-6,
Assert.assertEquals(GenotypeLikelihoods.getGQLog10FromLikelihoods(i, likelihoods), expectedQuals[i], 1e-6,
"GQ value for genotype " + i + " was not calculated correctly");
}
}

View File

@ -51,13 +51,13 @@ public class GenotypesContextUnitTest extends BaseTest {
C = Allele.create("C");
Aref = Allele.create("A", true);
T = Allele.create("T");
AA = new Genotype("AA", Arrays.asList(Aref, Aref));
AT = new Genotype("AT", Arrays.asList(Aref, T));
TT = new Genotype("TT", Arrays.asList(T, T));
AC = new Genotype("AC", Arrays.asList(Aref, C));
CT = new Genotype("CT", Arrays.asList(C, T));
CC = new Genotype("CC", Arrays.asList(C, C));
MISSING = new Genotype("MISSING", Arrays.asList(C, C));
AA = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
AT = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
TT = GenotypeBuilder.create("TT", Arrays.asList(T, T));
AC = GenotypeBuilder.create("AC", Arrays.asList(Aref, C));
CT = GenotypeBuilder.create("CT", Arrays.asList(C, T));
CC = GenotypeBuilder.create("CC", Arrays.asList(C, C));
MISSING = GenotypeBuilder.create("MISSING", Arrays.asList(C, C));
allGenotypes = Arrays.asList(AA, AT, TT, AC, CT, CC);
}
@ -223,8 +223,8 @@ public class GenotypesContextUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "GenotypesContextProvider")
public void testAdds(GenotypesContextProvider cfg) {
Genotype add1 = new Genotype("add1", Arrays.asList(Aref, Aref));
Genotype add2 = new Genotype("add2", Arrays.asList(Aref, Aref));
Genotype add1 = GenotypeBuilder.create("add1", Arrays.asList(Aref, Aref));
Genotype add2 = GenotypeBuilder.create("add2", Arrays.asList(Aref, Aref));
GenotypesContext gc = cfg.makeContext();
gc.add(add1);
@ -279,7 +279,7 @@ public class GenotypesContextUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "GenotypesContextProvider")
public void testSet(GenotypesContextProvider cfg) {
Genotype set = new Genotype("replace", Arrays.asList(Aref, Aref));
Genotype set = GenotypeBuilder.create("replace", Arrays.asList(Aref, Aref));
int n = cfg.makeContext().size();
for ( int i = 0; i < n; i++ ) {
GenotypesContext gc = cfg.makeContext();
@ -297,7 +297,7 @@ public class GenotypesContextUnitTest extends BaseTest {
for ( int i = 0; i < n; i++ ) {
GenotypesContext gc = cfg.makeContext();
Genotype toReplace = gc.get(i);
Genotype replacement = new Genotype(toReplace.getSampleName(), Arrays.asList(Aref, Aref));
Genotype replacement = GenotypeBuilder.create(toReplace.getSampleName(), Arrays.asList(Aref, Aref));
gc.replace(replacement);
ArrayList<Genotype> l = new ArrayList<Genotype>(cfg.initialSamples);
l.set(i, replacement);

View File

@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
public void run(final VariantContext vc) {
if ( samples == null )
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
VariantContext sub = vc.subContextFromSamples(samples);
VariantContext sub = vc.subContextFromSamples(samples, true);
sub.getNSamples();
}
};
@ -230,7 +230,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
for ( int i = 0; i < dupsToMerge; i++ ) {
GenotypesContext gc = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
gc.add(new Genotype(g.getSampleName()+"_"+i, g));
gc.add(new GenotypeBuilder(g).name(g.getSampleName()+"_"+i).make());
}
toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make());
}

View File

@ -273,7 +273,7 @@ public class VariantContextTestProvider {
for ( int i = 0; i < genotypes.length - 1; i++ )
gc.add(genotypes[i]);
for ( int i = 0; i < nCopiesOfLast; i++ )
gc.add(new Genotype("copy" + i, last));
gc.add(new GenotypeBuilder(last).name("copy" + i).make());
add(builder.genotypes(gc));
}
}
@ -286,12 +286,12 @@ public class VariantContextTestProvider {
// test ref/ref
final Allele ref = site.getReference();
final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null;
final Genotype homRef = new Genotype("homRef", Arrays.asList(ref, ref));
final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref));
addGenotypeTests(site, homRef);
if ( alt1 != null ) {
final Genotype het = new Genotype("het", Arrays.asList(ref, alt1));
final Genotype homVar = new Genotype("homVar", Arrays.asList(alt1, alt1));
final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1));
final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1));
addGenotypeTests(site, homRef, het);
addGenotypeTests(site, homRef, het, homVar);
final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
@ -299,17 +299,17 @@ public class VariantContextTestProvider {
// ploidy
if ( ENABLE_PLOIDY_TESTS ) {
addGenotypeTests(site,
new Genotype("dip", Arrays.asList(ref, alt1)),
new Genotype("hap", Arrays.asList(ref)));
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
GenotypeBuilder.create("hap", Arrays.asList(ref)));
addGenotypeTests(site,
new Genotype("dip", Arrays.asList(ref, alt1)),
new Genotype("tet", Arrays.asList(ref, alt1, alt1)));
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
addGenotypeTests(site,
new Genotype("nocall", noCall),
new Genotype("dip", Arrays.asList(ref, alt1)),
new Genotype("tet", Arrays.asList(ref, alt1, alt1)));
GenotypeBuilder.create("nocall", noCall),
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
}
}
@ -317,26 +317,26 @@ public class VariantContextTestProvider {
if ( site.getNAlleles() == 2 ) {
// testing PLs
addGenotypeTests(site,
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}),
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{0, -1, -2}),
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3}));
addGenotypeTests(site,
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3}));
addGenotypeTests(site,
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2000, -1000}));
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2000, -1000}));
addGenotypeTests(site, // missing PLs
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
new Genotype("g2", Arrays.asList(ref, ref), -1));
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{-1, 0, -2}),
GenotypeBuilder.create("g2", Arrays.asList(ref, ref)));
}
else if ( site.getNAlleles() == 3 ) {
// testing PLs
addGenotypeTests(site,
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2, -3, -4, -5}),
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3, -4, -5, -6}));
GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[]{0, -1, -2, -3, -4, -5}),
GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[]{0, -2, -3, -4, -5, -6}));
}
}
@ -383,11 +383,10 @@ public class VariantContextTestProvider {
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
if ( value.length == 0 )
return new Genotype(name, Arrays.asList(ref, ref), -1);
return GenotypeBuilder.create(name, Arrays.asList(ref, ref));
else {
final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value);
Map<String, Object> attr = Collections.singletonMap(key, toAdd);
return new Genotype(name, Arrays.asList(ref, ref), -1, null, attr, false);
return new GenotypeBuilder(name, Arrays.asList(ref, ref)).attribute(key, toAdd).make();
}
}
@ -488,7 +487,7 @@ public class VariantContextTestProvider {
Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString());
Assert.assertEquals(actual.getFilters(), expected.getFilters());
Assert.assertEquals(actual.getPhredScaledQual(), expected.getPhredScaledQual());
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes());
Assert.assertEquals(actual.isPhased(), expected.isPhased());
Assert.assertEquals(actual.getPloidy(), expected.getPloidy());
}

View File

@ -276,7 +276,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testCreatingPartiallyCalledGenotype() {
List<Allele> alleles = Arrays.asList(Aref, C);
Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL));
Genotype g = GenotypeBuilder.create("foo", Arrays.asList(C, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make();
Assert.assertTrue(vc.isSNP());
@ -293,7 +293,7 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertTrue(vc.getGenotype("foo").isMixed());
Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
Assert.assertEquals(vc.getGenotype("foo").getType(), GenotypeType.MIXED);
}
@Test (expectedExceptions = Exception.class)
@ -351,9 +351,9 @@ public class VariantContextUnitTest extends BaseTest {
public void testAccessingSimpleSNPGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3).make();
@ -388,12 +388,12 @@ public class VariantContextUnitTest extends BaseTest {
public void testAccessingCompleteGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g4 = new Genotype("Td", Arrays.asList(T, del));
Genotype g5 = new Genotype("dd", Arrays.asList(del, del));
Genotype g6 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3, g4, g5, g6).make();
@ -418,9 +418,9 @@ public class VariantContextUnitTest extends BaseTest {
List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
Genotype g1 = new Genotype("AA1", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AA2", Arrays.asList(Aref, Aref));
Genotype g3 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
Genotype g3 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3).make();
@ -439,8 +439,8 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testFilters() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1, g2).make();
@ -543,11 +543,11 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testGetGenotypeCounts() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g4 = new Genotype("A.", Arrays.asList(Aref, Allele.NO_CALL));
Genotype g5 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("A.", Arrays.asList(Aref, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
// we need to create a new VariantContext each time
VariantContext vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
@ -565,19 +565,19 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testVCFfromGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g4 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g5 = new Genotype("--", Arrays.asList(del, del));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())));
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())));
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())));
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())));
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())));
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())));
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
Assert.assertTrue(vc12.isPolymorphicInSamples());
Assert.assertTrue(vc23.isPolymorphicInSamples());
@ -620,9 +620,9 @@ public class VariantContextUnitTest extends BaseTest {
}
public void testGetGenotypeMethods() {
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
@ -725,9 +725,9 @@ public class VariantContextUnitTest extends BaseTest {
@DataProvider(name = "SitesAndGenotypesVC")
public Object[][] MakeSitesAndGenotypesVCs() {
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
VariantContext sites = new VariantContextBuilder("sites", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).make();
VariantContext genotypes = new VariantContextBuilder(sites).source("genotypes").genotypes(g1, g2, g3).make();
@ -764,9 +764,9 @@ public class VariantContextUnitTest extends BaseTest {
modified = new VariantContextBuilder(modified).attributes(null).make();
Assert.assertTrue(modified.getAttributes().isEmpty());
Genotype g1 = new Genotype("AA2", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT2", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT2", Arrays.asList(T, T));
Genotype g1 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT2", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT2", Arrays.asList(T, T));
GenotypesContext gc = GenotypesContext.create(g1,g2,g3);
modified = new VariantContextBuilder(cfg.vc).genotypes(gc).make();
Assert.assertEquals(modified.getGenotypes(), gc);
@ -824,13 +824,13 @@ public class VariantContextUnitTest extends BaseTest {
@Test(dataProvider = "SubContextTest")
public void runSubContextTest(SubContextTest cfg) {
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
VariantContext sub = cfg.updateAlleles ? vc.subContextFromSamples(cfg.samples) : vc.subContextFromSamples(cfg.samples, vc.getAlleles());
VariantContext sub = vc.subContextFromSamples(cfg.samples, cfg.updateAlleles);
// unchanged attributes should be the same
Assert.assertEquals(sub.getChr(), vc.getChr());
@ -911,7 +911,7 @@ public class VariantContextUnitTest extends BaseTest {
public void runSampleNamesTest(SampleNamesTest cfg) {
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
for ( final String name : cfg.sampleNames ) {
gc.add(new Genotype(name, Arrays.asList(Aref, T)));
gc.add(GenotypeBuilder.create(name, Arrays.asList(Aref, T)));
}
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
@ -926,11 +926,11 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testGenotypeCounting() {
Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL));
Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL));
Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref));
Genotype het = new Genotype("het", Arrays.asList(Aref, T));
Genotype homVar = new Genotype("homVar", Arrays.asList(T, T));
Genotype noCall = GenotypeBuilder.create("nocall", Arrays.asList(Allele.NO_CALL));
Genotype mixed = GenotypeBuilder.create("mixed", Arrays.asList(Aref, Allele.NO_CALL));
Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(Aref, Aref));
Genotype het = GenotypeBuilder.create("het", Arrays.asList(Aref, T));
Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(T, T));
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
final int nCycles = allGenotypes.size() * 10;
@ -943,7 +943,7 @@ public class VariantContextUnitTest extends BaseTest {
nSamples++;
Genotype g = allGenotypes.get(j % allGenotypes.size());
final String name = String.format("%s_%d%d", g.getSampleName(), i, j);
gc.add(new Genotype(name, g.getAlleles()));
gc.add(GenotypeBuilder.create(name, g.getAlleles()));
switch ( g.getType() ) {
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
case HOM_REF: nA += 2; nHomRef++; break;

View File

@ -64,16 +64,16 @@ public class VariantContextUtilsUnitTest extends BaseTest {
}
private Genotype makeG(String sample, Allele a1, Allele a2) {
return new Genotype(sample, Arrays.asList(a1, a2));
return GenotypeBuilder.create(sample, Arrays.asList(a1, a2));
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, pls);
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).PL(pls).make();
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).make();
}
private VariantContext makeVC(String source, List<Allele> alleles) {
@ -523,7 +523,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
for (Genotype value : actual) {
Genotype expectedValue = expected.get(value.getSampleName());
Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal");
Assert.assertEquals(value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal");
Assert.assertEquals(value.getLog10PError(), expectedValue.getLog10PError(), "GQ values aren't equal");
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not");
if ( value.hasLikelihoods() )