GenotypeMap -> GenotypeCollection part 2

-- Code actually builds
This commit is contained in:
Mark DePristo 2011-11-14 17:42:55 -05:00
parent 2e9d5363e7
commit f0234ab67f
37 changed files with 282 additions and 581 deletions

View File

@ -202,15 +202,6 @@ public class VariantContextAdaptors {
} }
} }
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
HashSet<String> names = new LinkedHashSet<String>();
for ( Genotype g : vc.getGenotypesSortedByName() ) {
names.add(g.getSampleName());
}
return new VCFHeader(hInfo == null ? new HashSet<VCFHeaderLine>() : hInfo, names);
}
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// GELI to VariantContext // GELI to VariantContext
@ -353,7 +344,7 @@ public class VariantContextAdaptors {
} }
Genotype g = new Genotype(samples[i], myAlleles); Genotype g = new Genotype(samples[i], myAlleles);
genotypes.put(samples[i], g); genotypes.add(g);
} }
HashMap<String, Object> attrs = new HashMap<String, Object>(1); HashMap<String, Object> attrs = new HashMap<String, Object>(1);

View File

@ -89,9 +89,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
if (haplotypes != null) { if (haplotypes != null) {
final Set<Map.Entry<String, Genotype>> genotypes = vc.getGenotypes().entrySet(); for ( final Genotype genotype : vc.getGenotypes()) {
for ( final Map.Entry<String, Genotype> genotype : genotypes ) { final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getKey());
if ( thisContext != null ) { if ( thisContext != null ) {
final ReadBackedPileup thisPileup; final ReadBackedPileup thisPileup;
if (thisContext.hasExtendedEventPileup()) if (thisContext.hasExtendedEventPileup())

View File

@ -52,8 +52,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
double hetCount = 0.0; double hetCount = 0.0;
double homCount = 0.0; double homCount = 0.0;
int N = 0; // number of samples that have likelihoods int N = 0; // number of samples that have likelihoods
for ( final Map.Entry<String, Genotype> genotypeMap : genotypes.entrySet() ) { for ( final Genotype g : genotypes ) {
Genotype g = genotypeMap.getValue();
if ( g.isNoCall() || !g.hasLikelihoods() ) if ( g.isNoCall() || !g.hasLikelihoods() )
continue; continue;

View File

@ -35,13 +35,13 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
int depth = 0; int depth = 0;
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) { for ( final Genotype genotype : genotypes ) {
// we care only about variant calls with likelihoods // we care only about variant calls with likelihoods
if ( genotype.getValue().isHomRef() ) if ( genotype.isHomRef() )
continue; continue;
AlignmentContext context = stratifiedContexts.get(genotype.getKey()); AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null ) if ( context == null )
continue; continue;

View File

@ -43,8 +43,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
if (vc.isSNP() && vc.isBiallelic()) { if (vc.isSNP() && vc.isBiallelic()) {
// todo - no current support for multiallelic snps // todo - no current support for multiallelic snps
for ( final Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) { for ( final Genotype genotype : genotypes ) {
final AlignmentContext context = stratifiedContexts.get(genotype.getKey()); final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null ) { if ( context == null ) {
continue; continue;
} }
@ -53,8 +53,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
} }
else if (vc.isIndel() || vc.isMixed()) { else if (vc.isIndel() || vc.isMixed()) {
for ( final Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) { for ( final Genotype genotype : genotypes ) {
final AlignmentContext context = stratifiedContexts.get(genotype.getKey()); final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null ) { if ( context == null ) {
continue; continue;
} }

View File

@ -229,11 +229,10 @@ public class VariantAnnotatorEngine {
return vc.getGenotypes(); return vc.getGenotypes();
GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples()); GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples());
for ( Map.Entry<String, Genotype> g : vc.getGenotypes().entrySet() ) { for ( final Genotype genotype : vc.getGenotypes() ) {
Genotype genotype = g.getValue(); AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
AlignmentContext context = stratifiedContexts.get(g.getKey());
if ( context == null ) { if ( context == null ) {
genotypes.put(g.getKey(), genotype); genotypes.add(genotype);
continue; continue;
} }
@ -243,7 +242,7 @@ public class VariantAnnotatorEngine {
if ( result != null ) if ( result != null )
genotypeAnnotations.putAll(result); genotypeAnnotations.putAll(result);
} }
genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased())); genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
} }
return genotypes; return genotypes;

View File

@ -202,9 +202,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
hapmapGenotypes = vc_comp.getGenotypes(); hapmapGenotypes = vc_comp.getGenotypes();
} }
for ( Map.Entry<String, Genotype> originalGenotypes : vc_input.getGenotypes().entrySet() ) { for ( final Genotype g : vc_input.getGenotypes() ) {
Genotype g = originalGenotypes.getValue();
Set<String> filters = new LinkedHashSet<String>(g.getFilters()); Set<String> filters = new LinkedHashSet<String>(g.getFilters());
boolean genotypeIsPhased = true; boolean genotypeIsPhased = true;
@ -214,7 +212,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
// use sample as key into genotypes structure // use sample as key into genotypes structure
if (vc_comp != null) { if (vc_comp != null) {
if (vc_input.getGenotypes().containsKey(sample) && hapmapGenotypes.containsKey(sample)) { if (vc_input.getGenotypes().containsSample(sample) && hapmapGenotypes.containsSample(sample)) {
Genotype hapmapGenotype = hapmapGenotypes.get(sample); Genotype hapmapGenotype = hapmapGenotypes.get(sample);
if (hapmapGenotype.isCalled()){ if (hapmapGenotype.isCalled()){
@ -325,13 +323,12 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
else { else {
originalAttributes.put("OG","."); originalAttributes.put("OG",".");
} }
Genotype imputedGenotype = new Genotype(originalGenotypes.getKey(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased); Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) { if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
beagleVarCounts++; beagleVarCounts++;
} }
genotypes.put(originalGenotypes.getKey(), imputedGenotype); genotypes.add(imputedGenotype);
} }
VariantContext filteredVC; VariantContext filteredVC;

View File

@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.File; import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
@ -245,18 +242,18 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
} }
if ( markers != null ) markers.append("\n"); if ( markers != null ) markers.append("\n");
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes(); GenotypeCollection preferredGenotypes = preferredVC.getGenotypes();
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; GenotypeCollection otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
for ( String sample : samples ) { for ( String sample : samples ) {
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
Genotype genotype; Genotype genotype;
boolean isValidation; boolean isValidation;
// use sample as key into genotypes structure // use sample as key into genotypes structure
if ( preferredGenotypes.keySet().contains(sample) ) { if ( preferredGenotypes.containsSample(sample) ) {
genotype = preferredGenotypes.get(sample); genotype = preferredGenotypes.get(sample);
isValidation = isValidationSite; isValidation = isValidationSite;
} else if ( otherGenotypes != null && otherGenotypes.keySet().contains(sample) ) { } else if ( otherGenotypes != null && otherGenotypes.containsSample(sample) ) {
genotype = otherGenotypes.get(sample); genotype = otherGenotypes.get(sample);
isValidation = ! isValidationSite; isValidation = ! isValidationSite;
} else { } else {

View File

@ -102,7 +102,7 @@ public class VCFDiffableReader implements DiffableReader {
vcRoot.add(attribute.getKey(), attribute.getValue()); vcRoot.add(attribute.getKey(), attribute.getValue());
} }
for (Genotype g : vc.getGenotypes().values() ) { for (Genotype g : vc.getGenotypes() ) {
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot); DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
gRoot.add("GT", g.getGenotypeString()); gRoot.add("GT", g.getGenotypeString());
gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 ); gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );

View File

@ -290,10 +290,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
genotypes = GenotypeCollection.create(vc.getGenotypes().size()); genotypes = GenotypeCollection.create(vc.getGenotypes().size());
// for each genotype, check filters then create a new object // for each genotype, check filters then create a new object
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) { for ( final Genotype g : vc.getGenotypes() ) {
Genotype g = genotype.getValue();
if ( g.isCalled() ) { if ( g.isCalled() ) {
Set<String> filters = new LinkedHashSet<String>(g.getFilters()); Set<String> filters = new LinkedHashSet<String>(g.getFilters());
@ -301,9 +298,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( VariantContextUtils.match(vc, g, exp) ) if ( VariantContextUtils.match(vc, g, exp) )
filters.add(exp.name); filters.add(exp.name);
} }
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased())); genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased()));
} else { } else {
genotypes.put(genotype.getKey(), g); genotypes.add(g);
} }
} }
} }

View File

@ -45,8 +45,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model { public enum Model {
/** The default model with the best performance in all cases */ /** The default model with the best performance in all cases */
EXACT, EXACT,
/** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */
GRID_SEARCH
} }
protected int N; protected int N;
@ -71,7 +69,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
* @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
*/ */
protected abstract void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> Alleles, protected abstract void getLog10PNonRef(GenotypeCollection GLs, List<Allele> Alleles,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors); double[] log10AlleleFrequencyPosteriors);

View File

@ -50,7 +50,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
} }
public void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles, public void getLog10PNonRef(GenotypeCollection GLs, List<Allele> alleles,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) { double[] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size(); final int numAlleles = alleles.size();
@ -94,11 +94,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) { private static final ArrayList<double[]> getGLs(GenotypeCollection GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.values() ) { for ( Genotype sample : GLs ) {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
@ -154,7 +154,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
public int linearExact(Map<String, Genotype> GLs, public int linearExact(GenotypeCollection GLs,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
@ -290,16 +290,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// todo = can't deal with optimal dynamic programming solution with multiallelic records // todo = can't deal with optimal dynamic programming solution with multiallelic records
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
sampleIndices.addAll(GLs.keySet()); sampleIndices.addAll(GLs.getSampleNames());
sampleIdx = GLs.size(); sampleIdx = GLs.size();
} }
else { else {
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) { for ( final Genotype genotype : GLs ) {
if ( !sample.getValue().hasLikelihoods() ) if ( !genotype.hasLikelihoods() )
continue; continue;
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector(); double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) { if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
//System.out.print(sample.getKey()+":"); //System.out.print(sample.getKey()+":");
@ -311,7 +311,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
continue; continue;
} }
sampleIndices.add(sample.getKey()); sampleIndices.add(genotype.getSampleName());
for (int k=0; k <= AFofMaxLikelihood; k++) { for (int k=0; k <= AFofMaxLikelihood; k++) {
@ -415,17 +415,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
qual = -1.0 * Math.log10(1.0 - chosenGenotype); qual = -1.0 * Math.log10(1.0 - chosenGenotype);
} }
//System.out.println(myAlleles.toString()); //System.out.println(myAlleles.toString());
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
} }
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) { for ( final Genotype genotype : GLs ) {
if ( !genotype.hasLikelihoods() )
if ( !sample.getValue().hasLikelihoods() )
continue; continue;
Genotype g = GLs.get(sample.getKey()); Genotype g = GLs.get(genotype.getSampleName());
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector(); double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL) if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
continue; // regular likelihoods continue; // regular likelihoods
@ -436,7 +435,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
myAlleles.add(Allele.NO_CALL); myAlleles.add(Allele.NO_CALL);
myAlleles.add(Allele.NO_CALL); myAlleles.add(Allele.NO_CALL);
//System.out.println(myAlleles.toString()); //System.out.println(myAlleles.toString());
calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false)); calls.add(new Genotype(genotype.getSampleName(), myAlleles, qual, null, g.getAttributes(), false));
} }
return calls; return calls;
} }

View File

@ -1,270 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeCollection;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.*;
public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
// for use in optimizing the P(D|AF) calculations:
// how much off from the max likelihoods do we need to be before we can quit calculating?
protected static final double LOG10_OPTIMIZATION_EPSILON = 8.0;
private AlleleFrequencyMatrix AFMatrix;
protected GridSearchAFEstimation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
AFMatrix = new AlleleFrequencyMatrix(N);
}
protected void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
initializeAFMatrix(GLs);
// first, calculate for AF=0 (no change to matrix)
log10AlleleFrequencyPosteriors[0] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[0];
double maxLikelihoodSeen = log10AlleleFrequencyPosteriors[0];
int maxAlleleFrequencyToTest = AFMatrix.getSamples().size() * 2;
// for each minor allele frequency, calculate log10PofDgivenAFi
for (int i = 1; i <= maxAlleleFrequencyToTest; i++) {
// add one more alternate allele
AFMatrix.incrementFrequency();
// calculate new likelihoods
log10AlleleFrequencyPosteriors[i] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[i];
// an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit
if ( maxLikelihoodSeen - log10AlleleFrequencyPosteriors[i] > LOG10_OPTIMIZATION_EPSILON )
return;
if ( log10AlleleFrequencyPosteriors[i] > maxLikelihoodSeen )
maxLikelihoodSeen = log10AlleleFrequencyPosteriors[i];
}
}
/**
* Overrides the super class
* @param vc variant context with genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
protected GenotypeCollection assignGenotypes(VariantContext vc,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
if ( !vc.isVariant() )
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
Allele refAllele = vc.getReference();
Allele altAllele = vc.getAlternateAllele(0);
GenotypeCollection calls = GenotypeCollection.create();
// first, the potential alt calls
for ( String sample : AFMatrix.getSamples() ) {
Genotype g = vc.getGenotype(sample);
// set the genotype and confidence
Pair<Integer, Double> AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample);
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) {
myAlleles.add(refAllele);
myAlleles.add(refAllele);
} else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) {
myAlleles.add(refAllele);
myAlleles.add(altAllele);
} else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() )
myAlleles.add(altAllele);
myAlleles.add(altAllele);
}
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, g.getAttributes(), false));
}
return calls;
}
private void initializeAFMatrix(Map<String, Genotype> GLs) {
AFMatrix.clear();
for ( Genotype g : GLs.values() ) {
if ( g.hasLikelihoods() )
AFMatrix.setLikelihoods(g.getLikelihoods().getAsVector(), g.getSampleName());
}
}
protected static class AlleleFrequencyMatrix {
private double[][] matrix; // allele frequency matrix
private int[] indexes; // matrix to maintain which genotype is active
private int maxN; // total possible frequencies in data
private int frequency; // current frequency
// data structures necessary to maintain a list of the best genotypes and their scores
private ArrayList<String> samples = new ArrayList<String>();
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
public AlleleFrequencyMatrix(int N) {
maxN = N;
matrix = new double[N][3];
indexes = new int[N];
clear();
}
public List<String> getSamples() { return samples; }
public void clear() {
frequency = 0;
for (int i = 0; i < maxN; i++)
indexes[i] = 0;
samples.clear();
samplesToGenotypesPerAF.clear();
}
public void setLikelihoods(double[] GLs, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.AA.ordinal()] = GLs[0];
matrix[index][GenotypeType.AB.ordinal()] = GLs[1];
matrix[index][GenotypeType.BB.ordinal()] = GLs[2];
}
public void incrementFrequency() {
int N = samples.size();
if ( frequency == 2 * N )
throw new ReviewedStingException("Frequency was incremented past N; how is this possible?");
frequency++;
double greedy = VALUE_NOT_CALCULATED;
int greedyIndex = -1;
for (int i = 0; i < N; i++) {
if ( indexes[i] == GenotypeType.AB.ordinal() ) {
if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()];
greedyIndex = i;
}
}
else if ( indexes[i] == GenotypeType.AA.ordinal() ) {
if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) {
greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()];
greedyIndex = i;
}
// note that we currently don't bother with breaking ties between samples
// (which would be done by looking at the HOM_VAR value) because it's highly
// unlikely that a collision will both occur and that the difference will
// be significant at HOM_VAR...
}
// if this person is already hom var, he can't add another alternate allele
// so we can ignore that case
}
if ( greedyIndex == -1 )
throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?");
if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() )
indexes[greedyIndex] = GenotypeType.BB.ordinal();
else
indexes[greedyIndex] = GenotypeType.AB.ordinal();
}
public double getLikelihoodsOfFrequency() {
double likelihoods = 0.0;
int N = samples.size();
for (int i = 0; i < N; i++)
likelihoods += matrix[i][indexes[i]];
/*
System.out.println(frequency);
for (int i = 0; i < N; i++) {
System.out.print(samples.get(i));
for (int j=0; j < 3; j++) {
System.out.print(String.valueOf(matrix[i][j]));
System.out.print(indexes[i] == j ? "* " : " ");
}
System.out.println();
}
System.out.println(likelihoods);
System.out.println();
*/
recordGenotypes();
return likelihoods;
}
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
return samplesToGenotypesPerAF.get(frequency).get(sample);
}
private void recordGenotypes() {
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
int index = 0;
for ( String sample : samples ) {
int genotype = indexes[index];
double score;
int maxEntry = MathUtils.maxElementIndex(matrix[index]);
// if the max value is for the most likely genotype, we can compute next vs. next best
if ( genotype == maxEntry ) {
if ( genotype == GenotypeType.AA.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else if ( genotype == GenotypeType.AB.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
else // ( genotype == GenotypeType.HOM.ordinal() )
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]);
}
// otherwise, we need to calculate the probability of the genotype
else {
double[] normalized = MathUtils.normalizeFromLog10(matrix[index]);
double chosenGenotype = normalized[genotype];
score = -1.0 * Math.log10(1.0 - chosenGenotype);
}
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
index++;
}
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
}
}
}

View File

@ -133,7 +133,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
for ( VariantContext vc : VCs ) { for ( VariantContext vc : VCs ) {
if ( variantVC == null && vc.isVariant() ) if ( variantVC == null && vc.isVariant() )
variantVC = vc; variantVC = vc;
genotypes.putAll(getGenotypesWithGLs(vc.getGenotypes())); genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes()));
} }
if ( variantVC == null ) { if ( variantVC == null ) {
@ -143,13 +143,12 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
return new VariantContext("VCwithGLs", variantVC.getChr(), variantVC.getStart(), variantVC.getEnd(), variantVC.getAlleles(), genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, null); return new VariantContext("VCwithGLs", variantVC.getChr(), variantVC.getStart(), variantVC.getEnd(), variantVC.getAlleles(), genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, null);
} }
private static Map<String, Genotype> getGenotypesWithGLs(Map<String, Genotype> genotypes) { private static GenotypeCollection getGenotypesWithGLs(GenotypeCollection genotypes) {
Map<String, Genotype> genotypesWithGLs = new HashMap<String, Genotype>(); GenotypeCollection genotypesWithGLs = GenotypeCollection.create(genotypes.size());
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) { for ( final Genotype g : genotypes ) {
if ( g.getValue().hasLikelihoods() && g.getValue().getLikelihoods().getAsVector() != null ) if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null )
genotypesWithGLs.put(g.getKey(), g.getValue()); genotypesWithGLs.add(g);
} }
return genotypesWithGLs; return genotypesWithGLs;
} }
} }

View File

@ -281,7 +281,7 @@ public class UnifiedGenotyperEngine {
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth()); attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.put(GL.getSample(), new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false)); genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false));
} }
GenomeLoc loc = refContext.getLocus(); GenomeLoc loc = refContext.getLocus();
@ -811,9 +811,6 @@ public class UnifiedGenotyperEngine {
case EXACT: case EXACT:
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter); afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
break; break;
case GRID_SEARCH:
afcm = new GridSearchAFEstimation(UAC, N, logger, verboseWriter);
break;
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
} }

View File

@ -1059,15 +1059,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
} }
GenotypeCollection genotypes = GenotypeCollection.create(); GenotypeCollection genotypes = GenotypeCollection.create();
for ( String sample : normalSamples ) { for ( String sample : normalSamples ) {
Map<String,Object> attrs = call.makeStatsAttributes(null); Map<String,Object> attrs = call.makeStatsAttributes(null);
if ( call.isCall() ) // we made a call - put actual het genotype here: if ( call.isCall() ) // we made a call - put actual het genotype here:
genotypes.put(sample,new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); genotypes.add(new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
genotypes.put(sample,new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
} }
Set<String> filters = null; Set<String> filters = null;
@ -1151,11 +1150,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
GenotypeCollection genotypes = GenotypeCollection.create(); GenotypeCollection genotypes = GenotypeCollection.create();
for ( String sample : normalSamples ) { for ( String sample : normalSamples ) {
genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false)); genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false));
} }
for ( String sample : tumorSamples ) { for ( String sample : tumorSamples ) {
genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) ); genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) );
} }
Set<String> filters = null; Set<String> filters = null;

View File

@ -122,7 +122,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
if (useSingleSample != null) { // only want to output context for one sample if (useSingleSample != null) { // only want to output context for one sample
Genotype sampGt = vc.getGenotype(useSingleSample); Genotype sampGt = vc.getGenotype(useSingleSample);
if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP! if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP!
vc = vc.subContextFromGenotypes(sampGt); vc = vc.subContextFromSample(sampGt.getSampleName());
else // asked for a sample that this vc does not contain, so ignore this vc: else // asked for a sample that this vc does not contain, so ignore this vc:
return; return;
} }

View File

@ -293,7 +293,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
if (tracker != null) { if (tracker != null) {
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
GenotypeCollection genotypeCollection = vc.getGenotypes(); GenotypeCollection genotypeCollection = GenotypeCollection.create(vc.getGenotypes().size());
for (Trio trio : trios) { for (Trio trio : trios) {
Genotype mother = vc.getGenotype(trio.getMother()); Genotype mother = vc.getGenotype(trio.getMother());
@ -306,9 +306,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
Genotype phasedFather = trioGenotypes.get(1); Genotype phasedFather = trioGenotypes.get(1);
Genotype phasedChild = trioGenotypes.get(2); Genotype phasedChild = trioGenotypes.get(2);
genotypeCollection.put(phasedMother.getSampleName(), phasedMother); genotypeCollection.add(phasedMother, phasedFather, phasedChild);
genotypeCollection.put(phasedFather.getSampleName(), phasedFather);
genotypeCollection.put(phasedChild.getSampleName(), phasedChild);
} }
VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeCollection); VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeCollection);

View File

@ -122,7 +122,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public int MIN_MAPPING_QUALITY_SCORE = 20; public int MIN_MAPPING_QUALITY_SCORE = 20;
@Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false) @Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
protected List<String> samplesToPhase = null; protected Set
<String> samplesToPhase = null;
private GenomeLoc mostDownstreamLocusReached = null; private GenomeLoc mostDownstreamLocusReached = null;
@ -272,10 +273,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY)); private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) { private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
// for ( String sample : samplesToPhase ) // for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() )); // logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values()); VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc); // logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc); // logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF); return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
@ -354,9 +355,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// Perform per-sample phasing: // Perform per-sample phasing:
GenotypeCollection sampGenotypes = vc.getGenotypes(); GenotypeCollection sampGenotypes = vc.getGenotypes();
Map<String, PhaseCounts> samplePhaseStats = new TreeMap<String, PhaseCounts>(); Map<String, PhaseCounts> samplePhaseStats = new TreeMap<String, PhaseCounts>();
for (Map.Entry<String, Genotype> sampGtEntry : sampGenotypes.entrySet()) { for (final Genotype gt : sampGenotypes) {
String samp = sampGtEntry.getKey(); String samp = gt.getSampleName();
Genotype gt = sampGtEntry.getValue();
if (DEBUG) logger.debug("sample = " + samp); if (DEBUG) logger.debug("sample = " + samp);
if (isUnfilteredCalledDiploidGenotype(gt)) { if (isUnfilteredCalledDiploidGenotype(gt)) {
@ -1134,7 +1134,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
this.start = vc.getStart(); this.start = vc.getStart();
this.stop = vc.getEnd(); this.stop = vc.getEnd();
this.alleles = vc.getAlleles(); this.alleles = vc.getAlleles();
this.genotypes = GenotypeCollection.create(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable this.genotypes = GenotypeCollection.copy(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
this.negLog10PError = vc.getNegLog10PError(); this.negLog10PError = vc.getNegLog10PError();
this.filters = vc.filtersWereApplied() ? vc.getFilters() : null; this.filters = vc.filtersWereApplied() ? vc.getFilters() : null;
this.attributes = new HashMap<String, Object>(vc.getAttributes()); this.attributes = new HashMap<String, Object>(vc.getAttributes());
@ -1153,7 +1153,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
public void setGenotype(String sample, Genotype newGt) { public void setGenotype(String sample, Genotype newGt) {
genotypes.put(sample, newGt); genotypes.add(newGt);
} }
public void setPhasingInconsistent() { public void setPhasingInconsistent() {

View File

@ -157,8 +157,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// A C A // A C A
// A C C // A C C
for (Genotype g : vc1.getGenotypes().values()) { for (final Genotype g : vc1.getGenotypes()) {
String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null;
switch (g.getType()) { switch (g.getType()) {
case NO_CALL: case NO_CALL:

View File

@ -118,7 +118,7 @@ public class G1KPhaseITable extends VariantEvaluator {
} }
// count variants per sample // count variants per sample
for (final Genotype g : eval.getGenotypes().values()) { for (final Genotype g : eval.getGenotypes()) {
if ( ! g.isNoCall() && ! g.isHomRef() ) { if ( ! g.isNoCall() && ! g.isHomRef() ) {
int count = countsPerSample.get(g.getSampleName()).get(eval.getType()); int count = countsPerSample.get(g.getSampleName()).get(eval.getType());
countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1); countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1);

View File

@ -277,8 +277,9 @@ public class GenotypeConcordance extends VariantEvaluator {
// determine concordance for eval data // determine concordance for eval data
if (eval != null) { if (eval != null) {
for (final String sample : eval.getGenotypes().keySet()) { for (final Genotype g : eval.getGenotypes() ) {
final Genotype.Type called = eval.getGenotype(sample).getType(); final String sample = g.getSampleName();
final Genotype.Type called = g.getType();
final Genotype.Type truth; final Genotype.Type truth;
if (!validationIsValidVC || !validation.hasGenotype(sample)) { if (!validationIsValidVC || !validation.hasGenotype(sample)) {
@ -299,9 +300,9 @@ public class GenotypeConcordance extends VariantEvaluator {
else { else {
final Genotype.Type called = Genotype.Type.NO_CALL; final Genotype.Type called = Genotype.Type.NO_CALL;
for (final String sample : validation.getGenotypes().keySet()) { for (final Genotype g : validation.getGenotypes()) {
final Genotype.Type truth = validation.getGenotype(sample).getType(); final Genotype.Type truth = g.getType();
detailedStats.incrValue(sample, truth, called); detailedStats.incrValue(g.getSampleName(), truth, called);
// print out interesting sites // print out interesting sites
/* /*
@ -410,8 +411,8 @@ class SampleStats implements TableType {
public SampleStats(VariantContext vc, int nGenotypeTypes) { public SampleStats(VariantContext vc, int nGenotypeTypes) {
this.nGenotypeTypes = nGenotypeTypes; this.nGenotypeTypes = nGenotypeTypes;
for (String sample : vc.getGenotypes().keySet()) for (final Genotype g : vc.getGenotypes())
concordanceStats.put(sample, new long[nGenotypeTypes][nGenotypeTypes]); concordanceStats.put(g.getSampleName(), new long[nGenotypeTypes][nGenotypeTypes]);
} }
public SampleStats(int genotypeTypes) { public SampleStats(int genotypeTypes) {
@ -511,8 +512,8 @@ class SampleSummaryStats implements TableType {
public SampleSummaryStats(final VariantContext vc) { public SampleSummaryStats(final VariantContext vc) {
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]); concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
for( final String sample : vc.getGenotypes().keySet() ) { for( final Genotype g : vc.getGenotypes() ) {
concordanceSummary.put(sample, new double[COLUMN_KEYS.length]); concordanceSummary.put(g.getSampleName(), new double[COLUMN_KEYS.length]);
} }
} }

View File

@ -48,7 +48,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
float numGenosHere = 0; float numGenosHere = 0;
int numIndsHere = 0; int numIndsHere = 0;
for (Genotype genotype : vc.getGenotypes().values()) { for (final Genotype genotype : vc.getGenotypes()) {
numIndsHere++; numIndsHere++;
if (!genotype.isNoCall()) { if (!genotype.isNoCall()) {
//increment stats for heterozygosity //increment stats for heterozygosity

View File

@ -266,7 +266,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested sample * @return a new VariantContext with just the requested sample
*/ */
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
return getSubsetOfVariantContext(vc, Arrays.asList(sampleName)); return getSubsetOfVariantContext(vc, new HashSet<String>(Arrays.asList(sampleName)));
} }
/** /**
@ -276,7 +276,7 @@ public class VariantEvalUtils {
* @param sampleNames the samples to pull out of the VariantContext * @param sampleNames the samples to pull out of the VariantContext
* @return a new VariantContext with just the requested samples * @return a new VariantContext with just the requested samples
*/ */
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) { public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles()); VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes()); HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());

View File

@ -212,15 +212,15 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
// create new Genotype objects // create new Genotype objects
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) { for ( final Genotype genotype : vc.getGenotypes() ) {
List<Allele> newAlleles = new ArrayList<Allele>(); List<Allele> newAlleles = new ArrayList<Allele>();
for ( Allele allele : genotype.getValue().getAlleles() ) { for ( Allele allele : genotype.getAlleles() ) {
Allele newA = alleleMap.get(allele); Allele newA = alleleMap.get(allele);
if ( newA == null ) if ( newA == null )
newA = Allele.NO_CALL; newA = Allele.NO_CALL;
newAlleles.add(newA); newAlleles.add(newA);
} }
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
} }
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel); return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel);

View File

@ -557,7 +557,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
// check if we find it in the variant rod // check if we find it in the variant rod
GenotypeCollection genotypes = vc.getGenotypes(samples); GenotypeCollection genotypes = vc.getGenotypes(samples);
for (Genotype g : genotypes.values()) { for (final Genotype g : genotypes) {
if (sampleHasVariant(g)) { if (sampleHasVariant(g)) {
// There is a variant called (or filtered with not exclude filtered option set) that is not HomRef for at least one of the samples. // There is a variant called (or filtered with not exclude filtered option set) that is not HomRef for at least one of the samples.
if (compVCs == null) if (compVCs == null)

View File

@ -130,8 +130,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// set the appropriate sample name if necessary // set the appropriate sample name if necessary
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
GenotypeCollection genotypes = GenotypeCollection.create(1); GenotypeCollection genotypes = GenotypeCollection.create(g);
genotypes.put(sampleName, g);
vc = VariantContext.modifyGenotypes(vc, genotypes); vc = VariantContext.modifyGenotypes(vc, genotypes);
} }

View File

@ -451,7 +451,7 @@ public class StandardVCFWriter extends IndexingVCFWriter {
boolean sawGoodGT = false; boolean sawGoodGT = false;
boolean sawGoodQual = false; boolean sawGoodQual = false;
boolean sawGenotypeFilter = false; boolean sawGenotypeFilter = false;
for ( Genotype g : vc.getGenotypes().values() ) { for ( final Genotype g : vc.getGenotypes() ) {
keys.addAll(g.getAttributes().keySet()); keys.addAll(g.getAttributes().keySet());
if ( g.isAvailable() ) if ( g.isAvailable() )
sawGoodGT = true; sawGoodGT = true;

View File

@ -180,7 +180,7 @@ public class VCF3Codec extends AbstractVCFCodec {
// add it to the list // add it to the list
try { try {
genotypes.put(sampleName, new Genotype(sampleName, genotypes.add(new Genotype(sampleName,
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap), parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
GTQual, GTQual,
genotypeFilters, genotypeFilters,

View File

@ -209,13 +209,7 @@ public class VCFCodec extends AbstractVCFCodec {
// add it to the list // add it to the list
try { try {
genotypes.put(sampleName, genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
new Genotype(sampleName,
GTalleles,
GTQual,
genotypeFilters,
gtAttributes,
phased));
} catch (TribbleException e) { } catch (TribbleException e) {
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
} }

View File

@ -155,12 +155,12 @@ public class GCF {
if ( genotypes.isEmpty() ) if ( genotypes.isEmpty() )
return VariantContext.NO_GENOTYPES; return VariantContext.NO_GENOTYPES;
else { else {
GenotypeCollection map = GenotypeCollection.create(); GenotypeCollection map = GenotypeCollection.create(genotypes.size());
for ( int i = 0; i < genotypes.size(); i++ ) { for ( int i = 0; i < genotypes.size(); i++ ) {
final String sampleName = header.getSample(i); final String sampleName = header.getSample(i);
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap); final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
map.put(sampleName, g); map.add(g);
} }
return map; return map;
@ -173,7 +173,7 @@ public class GCF {
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes); List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null); for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
for ( Genotype g : vc.getGenotypes().values() ) { for ( Genotype g : vc.getGenotypes() ) {
int i = GCFHeaderBuilder.encodeSample(g.getSampleName()); int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g)); genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
} }

View File

@ -67,20 +67,28 @@ public class GenotypeCollection implements List<Genotype> {
} }
public static final GenotypeCollection create(final int nGenotypes) { public static final GenotypeCollection create(final int nGenotypes) {
return new GenotypeCollection(nGenotypes, true); return new GenotypeCollection(nGenotypes, false);
} }
// todo -- differentiate between empty constructor and copy constructor // todo -- differentiate between empty constructor and copy constructor
// todo -- create constructor (Genotype ... genotypes) // todo -- create constructor (Genotype ... genotypes)
public static final GenotypeCollection create(final ArrayList<Genotype> genotypes) { public static final GenotypeCollection create(final ArrayList<Genotype> genotypes) {
return new GenotypeCollection(genotypes, true); return new GenotypeCollection(genotypes, false);
}
public static final GenotypeCollection create(final Genotype... genotypes) {
return new GenotypeCollection(new ArrayList<Genotype>(Arrays.asList(genotypes)), false);
} }
public static final GenotypeCollection copy(final GenotypeCollection toCopy) { public static final GenotypeCollection copy(final GenotypeCollection toCopy) {
return create(toCopy.genotypes); return create(toCopy.genotypes);
} }
public static final GenotypeCollection copy(final Collection<Genotype> toCopy) {
return create(new ArrayList<Genotype>(toCopy));
}
// public static final GenotypeMap create(final Collection<Genotype> genotypes) { // public static final GenotypeMap create(final Collection<Genotype> genotypes) {
// if ( genotypes == null ) // if ( genotypes == null )
// return null; // todo -- really should return an empty map // return null; // todo -- really should return an empty map
@ -173,6 +181,12 @@ public class GenotypeCollection implements List<Genotype> {
return genotypes.add(genotype); return genotypes.add(genotype);
} }
public boolean add(final Genotype ... genotype) {
checkImmutability();
invalidateCaches();
return genotypes.addAll(Arrays.asList(genotype));
}
@Override @Override
public void add(final int i, final Genotype genotype) { public void add(final int i, final Genotype genotype) {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
@ -291,7 +305,7 @@ public class GenotypeCollection implements List<Genotype> {
return genotypes.toArray(ts); return genotypes.toArray(ts);
} }
public Iterable<Genotype> iterateInOrder(final Iterable<String> sampleNamesInOrder) { public Iterable<Genotype> iterateInSampleNameOrder(final Iterable<String> sampleNamesInOrder) {
return new Iterable<Genotype>() { return new Iterable<Genotype>() {
@Override @Override
public Iterator<Genotype> iterator() { public Iterator<Genotype> iterator() {
@ -300,6 +314,10 @@ public class GenotypeCollection implements List<Genotype> {
}; };
} }
public Iterable<Genotype> iterateInSampleNameOrder() {
return iterateInSampleNameOrder(getSampleNamesOrderedByName());
}
private final class InOrderIterator implements Iterator<Genotype> { private final class InOrderIterator implements Iterator<Genotype> {
final Iterator<String> sampleNamesInOrder; final Iterator<String> sampleNamesInOrder;
@ -322,4 +340,41 @@ public class GenotypeCollection implements List<Genotype> {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
} }
public Set<String> getSampleNames() {
buildCache();
return sampleNameToOffset.keySet();
}
public Set<String> getSampleNamesOrderedByName() {
return new TreeSet<String>(getSampleNames());
}
public boolean containsSample(final String sample) {
buildCache();
return sampleNameToOffset.containsKey(sample);
}
public boolean containsSamples(final Collection<String> samples) {
buildCache();
return getSampleNames().containsAll(samples);
}
public GenotypeCollection subsetToSamples( final Collection<String> samples ) {
return subsetToSamples(new HashSet<String>(samples));
}
public GenotypeCollection subsetToSamples( final Set<String> samples ) {
if ( samples.size() == genotypes.size() )
return this;
else if ( samples.isEmpty() )
return NO_GENOTYPES;
else {
GenotypeCollection subset = create(samples.size());
for ( final Genotype g : genotypes )
if ( samples.contains(g.getSampleName()) )
subset.add(g);
return subset;
}
}
} }

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.lang.reflect.Array;
import java.util.*; import java.util.*;
/** /**
@ -279,7 +280,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
this(source, contig, start, stop, alleles, this(source, contig, start, stop, alleles,
GenotypeCollection.create(genotypes), GenotypeCollection.copy(genotypes),
negLog10PError, filters, attributes, null, false); negLog10PError, filters, attributes, null, false);
} }
@ -423,58 +424,73 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
/** // /**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype // * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually // * genotype and alleles in genotype. This is the right way to test if a single genotype is actually
* variant or not. // * variant or not.
* // *
* @param genotype genotype // * @param genotype genotype
* @return vc subcontext // * @return vc subcontext
*/ // * @deprecated replaced by {@link #subContextFromSample(String)}
public VariantContext subContextFromGenotypes(Genotype genotype) { // */
return subContextFromGenotypes(Arrays.asList(genotype)); // public VariantContext subContextFromGenotypes(Genotype genotype) {
} // return subContextFromGenotypes(Arrays.asList(genotype));
// }
//
//
// /**
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
// * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
// * variant or not.
// *
// * @param genotypes genotypes
// * @return vc subcontext
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection)}
// */
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
// return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ;
// }
//
// /**
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
// * genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes
// *
// * @param genotypes genotypes
// * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
// * @return vc subcontext
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection, java.util.Collection)}
// */
// @Deprecated
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
// return new VariantContext(getSource(), contig, start, stop, alleles,
// GenotypeCollection.create(genotypes),
// getNegLog10PError(),
// filtersWereApplied() ? getFilters() : null,
// getAttributes(),
// getReferenceBaseForIndel());
// }
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotypes genotypes
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ;
}
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
* genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes
*
* @param genotypes genotypes
* @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
return new VariantContext(getSource(), contig, start, stop, alleles, return new VariantContext(getSource(), contig, start, stop, alleles,
GenotypeCollection.create(genotypes), genotypes.subsetToSamples(sampleNames),
getNegLog10PError(), getNegLog10PError(),
filtersWereApplied() ? getFilters() : null, filtersWereApplied() ? getFilters() : null,
getAttributes(), getAttributes(),
getReferenceBaseForIndel()); getReferenceBaseForIndel());
} }
public VariantContext subContextFromSamples(Collection<String> sampleNames, Collection<Allele> alleles) { public VariantContext subContextFromSamples(Set<String> sampleNames) {
return subContextFromGenotypes(getGenotypes(sampleNames).values(), alleles); GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames);
} return new VariantContext(getSource(), contig, start, stop, allelesOfGenotypes(newGenotypes),
newGenotypes,
public VariantContext subContextFromSamples(Collection<String> sampleNames) { getNegLog10PError(),
return subContextFromGenotypes(getGenotypes(sampleNames).values()); filtersWereApplied() ? getFilters() : null,
getAttributes(),
getReferenceBaseForIndel());
} }
public VariantContext subContextFromSample(String sampleName) { public VariantContext subContextFromSample(String sampleName) {
return subContextFromGenotypes(getGenotype(sampleName)); return subContextFromSamples(new HashSet<String>(Arrays.asList(sampleName)));
} }
/** /**
@ -875,16 +891,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*/ */
public boolean hasGenotypes() { public boolean hasGenotypes() {
loadGenotypes(); loadGenotypes();
return genotypes.size() > 0; return ! genotypes.isEmpty();
} }
public boolean hasGenotypes(Collection<String> sampleNames) { public boolean hasGenotypes(Collection<String> sampleNames) {
loadGenotypes(); loadGenotypes();
for ( String name : sampleNames ) { return genotypes.containsSamples(sampleNames);
if ( ! genotypes.containsKey(name) )
return false;
}
return true;
} }
/** /**
@ -895,10 +907,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return genotypes; return genotypes;
} }
public List<Genotype> getGenotypesSortedByName() { public Iterable<Genotype> getGenotypesSortedByName() {
loadGenotypes(); loadGenotypes();
Collection<Genotype> types = new TreeMap<String,Genotype>(genotypes).values(); return genotypes.iterateInSampleNameOrder();
return new ArrayList<Genotype>(types);
} }
/** /**
@ -922,24 +933,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @throws IllegalArgumentException if sampleName isn't bound to a genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/ */
public GenotypeCollection getGenotypes(Collection<String> sampleNames) { public GenotypeCollection getGenotypes(Collection<String> sampleNames) {
GenotypeCollection map = GenotypeCollection.create(sampleNames.size()); return getGenotypes().subsetToSamples(sampleNames);
for ( String name : sampleNames ) {
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
final Genotype g = getGenotype(name);
if ( g != null ) {
map.put(name, g);
}
} }
return map; public GenotypeCollection getGenotypes(Set<String> sampleNames) {
return getGenotypes().subsetToSamples(sampleNames);
} }
/** /**
* @return the set of all sample names in this context * @return the set of all sample names in this context, not ordered
*/ */
public Set<String> getSampleNames() { public Set<String> getSampleNames() {
return getGenotypes().keySet(); return getGenotypes().getSampleNames();
}
public Set<String> getSampleNamesOrderedByName() {
return getGenotypes().getSampleNamesOrderedByName();
} }
/** /**
@ -952,11 +962,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
public boolean hasGenotype(String sample) { public boolean hasGenotype(String sample) {
return getGenotypes().containsKey(sample); return getGenotypes().containsSample(sample);
} }
public Genotype getGenotype(int ith) { public Genotype getGenotype(int ith) {
return getGenotypesSortedByName().get(ith); return genotypes.get(ith);
} }
@ -968,7 +978,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
public int getChromosomeCount() { public int getChromosomeCount() {
int n = 0; int n = 0;
for ( Genotype g : getGenotypes().values() ) { for ( final Genotype g : getGenotypes() ) {
n += g.isNoCall() ? 0 : g.getPloidy(); n += g.isNoCall() ? 0 : g.getPloidy();
} }
@ -984,7 +994,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
public int getChromosomeCount(Allele a) { public int getChromosomeCount(Allele a) {
int n = 0; int n = 0;
for ( Genotype g : getGenotypes().values() ) { for ( final Genotype g : getGenotypes() ) {
n += g.getAlleles(a).size(); n += g.getAlleles(a).size();
} }
@ -1015,7 +1025,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
if ( genotypeCounts == null ) { if ( genotypeCounts == null ) {
genotypeCounts = new int[Genotype.Type.values().length]; genotypeCounts = new int[Genotype.Type.values().length];
for ( Genotype g : getGenotypes().values() ) { for ( final Genotype g : getGenotypes() ) {
if ( g.isNoCall() ) if ( g.isNoCall() )
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++; genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
else if ( g.isHomRef() ) else if ( g.isHomRef() )
@ -1136,7 +1146,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
List<Allele> reportedAlleles = getAlleles(); List<Allele> reportedAlleles = getAlleles();
Set<Allele> observedAlleles = new HashSet<Allele>(); Set<Allele> observedAlleles = new HashSet<Allele>();
observedAlleles.add(getReference()); observedAlleles.add(getReference());
for ( Genotype g : getGenotypes().values() ) { for ( final Genotype g : getGenotypes() ) {
if ( g.isCalled() ) if ( g.isCalled() )
observedAlleles.addAll(g.getAlleles()); observedAlleles.addAll(g.getAlleles());
} }
@ -1285,12 +1295,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
private void validateGenotypes() { private void validateGenotypes() {
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null"); if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null");
for ( Map.Entry<String, Genotype> elt : this.genotypes.entrySet() ) { for ( final Genotype g : this.genotypes ) {
String name = elt.getKey();
Genotype g = elt.getValue();
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
if ( g.isAvailable() ) { if ( g.isAvailable() ) {
for ( Allele gAllele : g.getAlleles() ) { for ( Allele gAllele : g.getAlleles() ) {
if ( ! hasAllele(gAllele) && gAllele.isCalled() ) if ( ! hasAllele(gAllele) && gAllele.isCalled() )
@ -1465,8 +1470,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
Byte refByte = inputVC.getReferenceBaseForIndel(); Byte refByte = inputVC.getReferenceBaseForIndel();
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
GenotypeCollection genotypes = GenotypeCollection.create();
GenotypeCollection inputGenotypes = inputVC.getGenotypes();
for (Allele a : inputVC.getAlleles()) { for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases // get bases for current allele and create a new one with trimmed bases
@ -1483,11 +1486,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
// now we can recreate new genotypes with trimmed alleles // now we can recreate new genotypes with trimmed alleles
for (String sample : inputVC.getSampleNames()) { GenotypeCollection genotypes = GenotypeCollection.create(inputVC.getNSamples());
Genotype g = inputGenotypes.get(sample); for (final Genotype g : inputVC.getGenotypes() ) {
List<Allele> inAlleles = g.getAlleles(); List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(); List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (Allele a : inAlleles) { for (Allele a : inAlleles) {
if (a.isCalled()) { if (a.isCalled()) {
if (a.isSymbolic()) { if (a.isSymbolic()) {
@ -1506,8 +1508,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
newGenotypeAlleles.add(Allele.NO_CALL); newGenotypeAlleles.add(Allele.NO_CALL);
} }
} }
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(), genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getNegLog10PError(),
g.getFilters(),g.getAttributes(),g.isPhased())); g.getFilters(), g.getAttributes(), g.isPhased()));
} }
@ -1520,48 +1522,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
public ArrayList<Allele> getTwoAllelesWithHighestAlleleCounts() {
// first idea: get two alleles with highest AC
int maxAC1 = 0, maxAC2=0,maxAC1ind =0, maxAC2ind = 0;
int i=0;
int[] alleleCounts = new int[this.getAlleles().size()];
ArrayList<Allele> alleleArray = new ArrayList<Allele>();
for (Allele a:this.getAlleles()) {
int ac = this.getChromosomeCount(a);
if (ac >=maxAC1) {
maxAC1 = ac;
maxAC1ind = i;
}
alleleArray.add(a);
alleleCounts[i++] = ac;
}
// now get second best allele
for (i=0; i < alleleCounts.length; i++) {
if (i == maxAC1ind)
continue;
if (alleleCounts[i] >= maxAC2) {
maxAC2 = alleleCounts[i];
maxAC2ind = i;
}
}
Allele alleleA, alleleB;
if (alleleArray.get(maxAC1ind).isReference()) {
alleleA = alleleArray.get(maxAC1ind);
alleleB = alleleArray.get(maxAC2ind);
}
else if (alleleArray.get(maxAC2ind).isReference()) {
alleleA = alleleArray.get(maxAC2ind);
alleleB = alleleArray.get(maxAC1ind);
} else {
alleleA = alleleArray.get(maxAC1ind);
alleleB = alleleArray.get(maxAC2ind);
}
ArrayList<Allele> a = new ArrayList<Allele>();
a.add(alleleA);
a.add(alleleB);
return a;
}
public Allele getAltAlleleWithHighestAlleleCount() { public Allele getAltAlleleWithHighestAlleleCount() {
// first idea: get two alleles with highest AC // first idea: get two alleles with highest AC
Allele best = null; Allele best = null;

View File

@ -70,7 +70,7 @@ public class VariantContextUtils {
* @return VariantContext object * @return VariantContext object
*/ */
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) { public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.create(genotypes), negLog10PError, filters, attributes); return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.copy(genotypes), negLog10PError, filters, attributes);
} }
/** /**
@ -351,10 +351,9 @@ public class VariantContextUtils {
// Genotypes // Genotypes
final GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples()); final GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes().values() ) { for ( final Genotype g : vc.getGenotypes() ) {
Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve); Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
genotypes.put(g.getSampleName(), genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(),
new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(),
genotypeAttributes, g.isPhased())); genotypeAttributes, g.isPhased()));
} }
@ -682,9 +681,9 @@ public class VariantContextUtils {
if (!hasNullAlleles) if (!hasNullAlleles)
return inputVC; return inputVC;
// now we can recreate new genotypes with trimmed alleles // now we can recreate new genotypes with trimmed alleles
for ( Map.Entry<String, Genotype> sample : inputVC.getGenotypes().entrySet() ) { for ( final Genotype genotype : inputVC.getGenotypes() ) {
List<Allele> originalAlleles = sample.getValue().getAlleles(); List<Allele> originalAlleles = genotype.getAlleles();
List<Allele> trimmedAlleles = new ArrayList<Allele>(); List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( Allele a : originalAlleles ) { for ( Allele a : originalAlleles ) {
if ( a.isCalled() ) if ( a.isCalled() )
@ -692,7 +691,7 @@ public class VariantContextUtils {
else else
trimmedAlleles.add(Allele.NO_CALL); trimmedAlleles.add(Allele.NO_CALL);
} }
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles)); genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
} }
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0])); return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
@ -705,8 +704,8 @@ public class VariantContextUtils {
public static GenotypeCollection stripPLs(GenotypeCollection genotypes) { public static GenotypeCollection stripPLs(GenotypeCollection genotypes) {
GenotypeCollection newGs = GenotypeCollection.create(genotypes.size()); GenotypeCollection newGs = GenotypeCollection.create(genotypes.size());
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) { for ( final Genotype g : genotypes ) {
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue()); newGs.add(g.hasLikelihoods() ? removePLs(g) : g);
} }
return newGs; return newGs;
@ -884,9 +883,9 @@ public class VariantContextUtils {
} }
private static void mergeGenotypes(GenotypeCollection mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { private static void mergeGenotypes(GenotypeCollection mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
for ( Genotype g : oneVC.getGenotypes().values() ) { for ( Genotype g : oneVC.getGenotypes() ) {
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
if ( ! mergedGenotypes.containsKey(name) ) { if ( ! mergedGenotypes.containsSample(name) ) {
// only add if the name is new // only add if the name is new
Genotype newG = g; Genotype newG = g;
@ -895,7 +894,7 @@ public class VariantContextUtils {
newG = new Genotype(name, alleles, g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); newG = new Genotype(name, alleles, g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased());
} }
mergedGenotypes.put(name, newG); mergedGenotypes.add(newG);
} }
} }
} }
@ -924,15 +923,15 @@ public class VariantContextUtils {
// create new Genotype objects // create new Genotype objects
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) { for ( final Genotype genotype : vc.getGenotypes() ) {
List<Allele> newAlleles = new ArrayList<Allele>(); List<Allele> newAlleles = new ArrayList<Allele>();
for ( Allele allele : genotype.getValue().getAlleles() ) { for ( Allele allele : genotype.getAlleles() ) {
Allele newAllele = alleleMap.get(allele); Allele newAllele = alleleMap.get(allele);
if ( newAllele == null ) if ( newAllele == null )
newAllele = Allele.NO_CALL; newAllele = Allele.NO_CALL;
newAlleles.add(newAllele); newAlleles.add(newAllele);
} }
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
} }
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
@ -944,13 +943,13 @@ public class VariantContextUtils {
return vc; return vc;
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) { for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>(); Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getValue().getAttributes().entrySet() ) { for ( Map.Entry<String, Object> attr : genotype.getAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) ) if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue()); attrs.put(attr.getKey(), attr.getValue());
} }
newGenotypes.put(genotype.getKey(), Genotype.modifyAttributes(genotype.getValue(), attrs)); newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
} }
return VariantContext.modifyGenotypes(vc, newGenotypes); return VariantContext.modifyGenotypes(vc, newGenotypes);
@ -1023,10 +1022,8 @@ public class VariantContextUtils {
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
GenotypeCollection mergedGenotypes = GenotypeCollection.create(); GenotypeCollection mergedGenotypes = GenotypeCollection.create();
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) { for (final Genotype gt1 : vc1.getGenotypes()) {
String sample = gt1Entry.getKey(); Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
Genotype gt1 = gt1Entry.getValue();
Genotype gt2 = vc2.getGenotype(sample);
List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site1Alleles = gt1.getAlleles();
List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles();
@ -1052,8 +1049,8 @@ public class VariantContextUtils {
if (phaseQual.PQ != null) if (phaseQual.PQ != null)
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
Genotype mergedGt = new Genotype(sample, mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
mergedGenotypes.put(sample, mergedGt); mergedGenotypes.add(mergedGt);
} }
String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource()); String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource());
@ -1197,8 +1194,7 @@ public class VariantContextUtils {
} }
private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) { private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
for (Map.Entry<String, Genotype> gtEntry : vc.getGenotypes().entrySet()) { for (final Genotype gt : vc.getGenotypes()) {
Genotype gt = gtEntry.getValue();
if (gt.isNoCall() || gt.isFiltered()) if (gt.isNoCall() || gt.isFiltered())
return false; return false;
} }
@ -1210,10 +1206,8 @@ public class VariantContextUtils {
private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) { for (final Genotype gt1 : vc1.getGenotypes()) {
String sample = gt1Entry.getKey(); Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
Genotype gt1 = gt1Entry.getValue();
Genotype gt2 = vc2.getGenotype(sample);
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
return false; return false;
@ -1275,10 +1269,8 @@ public class VariantContextUtils {
*/ */
public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) { for (final Genotype gt1 : vc1.getGenotypes()) {
String sample = gt1Entry.getKey(); Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
Genotype gt1 = gt1Entry.getValue();
Genotype gt2 = vc2.getGenotype(sample);
List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site1Alleles = gt1.getAlleles();
List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles();
@ -1309,10 +1301,8 @@ public class VariantContextUtils {
allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples). // Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) { for (final Genotype gt1 : vc1.getGenotypes()) {
String sample = gt1Entry.getKey(); Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
Genotype gt1 = gt1Entry.getValue();
Genotype gt2 = vc2.getGenotype(sample);
List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site1Alleles = gt1.getAlleles();
List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles();

View File

@ -37,13 +37,14 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Arrays; import java.util.*;
import java.util.EnumSet;
import java.util.List;
/** /**
* Test routine for new VariantContext object * Test routine for new VariantContext object
@ -93,7 +94,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
if ( writer != null && n == 0 ) { if ( writer != null && n == 0 ) {
if ( ! wroteHeader ) { if ( ! wroteHeader ) {
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); writer.writeHeader(createVCFHeader(vc));
wroteHeader = true; wroteHeader = true;
} }
@ -115,6 +116,10 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
} }
} }
private static VCFHeader createVCFHeader(VariantContext vc) {
return new VCFHeader(new HashSet<VCFHeaderLine>(), vc.getGenotypes().getSampleNamesSorted());
}
public Integer reduceInit() { public Integer reduceInit() {
return 0; return 0;
} }

View File

@ -122,7 +122,7 @@ public class VCFWriterUnitTest extends BaseTest {
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
Set<String> filters = null; Set<String> filters = null;
Map<String, Object> attributes = new HashMap<String,Object>(); Map<String, Object> attributes = new HashMap<String,Object>();
GenotypeCollection genotypes = GenotypeCollection.create(); GenotypeCollection genotypes = GenotypeCollection.create(header.getGenotypeSamples().size());
alleles.add(Allele.create("-",true)); alleles.add(Allele.create("-",true));
alleles.add(Allele.create("CC",false)); alleles.add(Allele.create("CC",false));
@ -133,7 +133,7 @@ public class VCFWriterUnitTest extends BaseTest {
gtattributes.put("BB","1"); gtattributes.put("BB","1");
Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true); Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true);
genotypes.put(name,gt); genotypes.add(gt);
} }
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A'); return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A');

View File

@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
int start = 10; int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start; int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContext(source, "1", start, stop, alleles, return new VariantContext(source, "1", start, stop, alleles,
GenotypeCollection.create(genotypes), 1.0, filters, null, Cref.getBases()[0]); GenotypeCollection.copy(genotypes), 1.0, filters, null, Cref.getBases()[0]);
} }
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
@ -509,7 +509,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
} }
// necessary to not overload equals for genotypes // necessary to not overload equals for genotypes
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) { private void assertGenotypesAreMostlyEqual(GenotypeCollection actual, GenotypeCollection expected) {
if (actual == expected) { if (actual == expected) {
return; return;
} }
@ -522,10 +522,8 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
} }
for (Map.Entry<String, Genotype> entry : actual.entrySet()) { for (Genotype value : actual) {
String key = entry.getKey(); Genotype expectedValue = expected.get(value.getSampleName());
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal"); Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal");
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal"); Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal");
@ -545,7 +543,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes // test genotypes
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2"))); Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
} }
@Test(expectedExceptions = UserException.class) @Test(expectedExceptions = UserException.class)