GenotypeMap -> GenotypeCollection part 2
-- Code actually builds
This commit is contained in:
parent
2e9d5363e7
commit
f0234ab67f
|
|
@ -202,15 +202,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
|
||||
HashSet<String> names = new LinkedHashSet<String>();
|
||||
for ( Genotype g : vc.getGenotypesSortedByName() ) {
|
||||
names.add(g.getSampleName());
|
||||
}
|
||||
|
||||
return new VCFHeader(hInfo == null ? new HashSet<VCFHeaderLine>() : hInfo, names);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// GELI to VariantContext
|
||||
|
|
@ -353,7 +344,7 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
|
||||
Genotype g = new Genotype(samples[i], myAlleles);
|
||||
genotypes.put(samples[i], g);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
HashMap<String, Object> attrs = new HashMap<String, Object>(1);
|
||||
|
|
|
|||
|
|
@ -89,9 +89,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
if (haplotypes != null) {
|
||||
final Set<Map.Entry<String, Genotype>> genotypes = vc.getGenotypes().entrySet();
|
||||
for ( final Map.Entry<String, Genotype> genotype : genotypes ) {
|
||||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getKey());
|
||||
for ( final Genotype genotype : vc.getGenotypes()) {
|
||||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( thisContext != null ) {
|
||||
final ReadBackedPileup thisPileup;
|
||||
if (thisContext.hasExtendedEventPileup())
|
||||
|
|
|
|||
|
|
@ -52,8 +52,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
double hetCount = 0.0;
|
||||
double homCount = 0.0;
|
||||
int N = 0; // number of samples that have likelihoods
|
||||
for ( final Map.Entry<String, Genotype> genotypeMap : genotypes.entrySet() ) {
|
||||
Genotype g = genotypeMap.getValue();
|
||||
for ( final Genotype g : genotypes ) {
|
||||
if ( g.isNoCall() || !g.hasLikelihoods() )
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,13 +35,13 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
|
||||
int depth = 0;
|
||||
|
||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
|
||||
// we care only about variant calls with likelihoods
|
||||
if ( genotype.getValue().isHomRef() )
|
||||
if ( genotype.isHomRef() )
|
||||
continue;
|
||||
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -43,8 +43,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
|
||||
if (vc.isSNP() && vc.isBiallelic()) {
|
||||
// todo - no current support for multiallelic snps
|
||||
for ( final Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null ) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -53,8 +53,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
}
|
||||
else if (vc.isIndel() || vc.isMixed()) {
|
||||
|
||||
for ( final Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null ) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -229,11 +229,10 @@ public class VariantAnnotatorEngine {
|
|||
return vc.getGenotypes();
|
||||
|
||||
GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples());
|
||||
for ( Map.Entry<String, Genotype> g : vc.getGenotypes().entrySet() ) {
|
||||
Genotype genotype = g.getValue();
|
||||
AlignmentContext context = stratifiedContexts.get(g.getKey());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null ) {
|
||||
genotypes.put(g.getKey(), genotype);
|
||||
genotypes.add(genotype);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -243,7 +242,7 @@ public class VariantAnnotatorEngine {
|
|||
if ( result != null )
|
||||
genotypeAnnotations.putAll(result);
|
||||
}
|
||||
genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -202,9 +202,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
hapmapGenotypes = vc_comp.getGenotypes();
|
||||
}
|
||||
|
||||
for ( Map.Entry<String, Genotype> originalGenotypes : vc_input.getGenotypes().entrySet() ) {
|
||||
|
||||
Genotype g = originalGenotypes.getValue();
|
||||
for ( final Genotype g : vc_input.getGenotypes() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
|
||||
boolean genotypeIsPhased = true;
|
||||
|
|
@ -214,7 +212,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
// use sample as key into genotypes structure
|
||||
if (vc_comp != null) {
|
||||
|
||||
if (vc_input.getGenotypes().containsKey(sample) && hapmapGenotypes.containsKey(sample)) {
|
||||
if (vc_input.getGenotypes().containsSample(sample) && hapmapGenotypes.containsSample(sample)) {
|
||||
|
||||
Genotype hapmapGenotype = hapmapGenotypes.get(sample);
|
||||
if (hapmapGenotype.isCalled()){
|
||||
|
|
@ -325,13 +323,12 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
}
|
||||
Genotype imputedGenotype = new Genotype(originalGenotypes.getKey(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
|
||||
Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
beagleVarCounts++;
|
||||
}
|
||||
|
||||
genotypes.put(originalGenotypes.getKey(), imputedGenotype);
|
||||
|
||||
genotypes.add(imputedGenotype);
|
||||
}
|
||||
|
||||
VariantContext filteredVC;
|
||||
|
|
|
|||
|
|
@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -245,18 +242,18 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
if ( markers != null ) markers.append("\n");
|
||||
|
||||
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
|
||||
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
GenotypeCollection preferredGenotypes = preferredVC.getGenotypes();
|
||||
GenotypeCollection otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
for ( String sample : samples ) {
|
||||
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
|
||||
|
||||
Genotype genotype;
|
||||
boolean isValidation;
|
||||
// use sample as key into genotypes structure
|
||||
if ( preferredGenotypes.keySet().contains(sample) ) {
|
||||
if ( preferredGenotypes.containsSample(sample) ) {
|
||||
genotype = preferredGenotypes.get(sample);
|
||||
isValidation = isValidationSite;
|
||||
} else if ( otherGenotypes != null && otherGenotypes.keySet().contains(sample) ) {
|
||||
} else if ( otherGenotypes != null && otherGenotypes.containsSample(sample) ) {
|
||||
genotype = otherGenotypes.get(sample);
|
||||
isValidation = ! isValidationSite;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
vcRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
||||
for (Genotype g : vc.getGenotypes().values() ) {
|
||||
for (Genotype g : vc.getGenotypes() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
|
|
|
|||
|
|
@ -290,10 +290,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
genotypes = GenotypeCollection.create(vc.getGenotypes().size());
|
||||
|
||||
// for each genotype, check filters then create a new object
|
||||
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
|
||||
|
||||
Genotype g = genotype.getValue();
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
|
||||
|
|
@ -301,9 +298,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( VariantContextUtils.match(vc, g, exp) )
|
||||
filters.add(exp.name);
|
||||
}
|
||||
genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
} else {
|
||||
genotypes.put(genotype.getKey(), g);
|
||||
genotypes.add(g);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,8 +45,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
public enum Model {
|
||||
/** The default model with the best performance in all cases */
|
||||
EXACT,
|
||||
/** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */
|
||||
GRID_SEARCH
|
||||
}
|
||||
|
||||
protected int N;
|
||||
|
|
@ -71,7 +69,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
||||
*/
|
||||
protected abstract void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> Alleles,
|
||||
protected abstract void getLog10PNonRef(GenotypeCollection GLs, List<Allele> Alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
super(UAC, N, logger, verboseWriter);
|
||||
}
|
||||
|
||||
public void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
public void getLog10PNonRef(GenotypeCollection GLs, List<Allele> alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors) {
|
||||
final int numAlleles = alleles.size();
|
||||
|
|
@ -94,11 +94,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
}
|
||||
|
||||
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
|
||||
private static final ArrayList<double[]> getGLs(GenotypeCollection GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.values() ) {
|
||||
for ( Genotype sample : GLs ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
|
|
@ -154,7 +154,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
}
|
||||
|
||||
public int linearExact(Map<String, Genotype> GLs,
|
||||
public int linearExact(GenotypeCollection GLs,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
|
|
@ -290,16 +290,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||
sampleIndices.addAll(GLs.keySet());
|
||||
sampleIndices.addAll(GLs.getSampleNames());
|
||||
sampleIdx = GLs.size();
|
||||
}
|
||||
else {
|
||||
|
||||
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
||||
if ( !sample.getValue().hasLikelihoods() )
|
||||
for ( final Genotype genotype : GLs ) {
|
||||
if ( !genotype.hasLikelihoods() )
|
||||
continue;
|
||||
|
||||
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
||||
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
||||
|
||||
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
|
||||
//System.out.print(sample.getKey()+":");
|
||||
|
|
@ -311,7 +311,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
continue;
|
||||
}
|
||||
|
||||
sampleIndices.add(sample.getKey());
|
||||
sampleIndices.add(genotype.getSampleName());
|
||||
|
||||
for (int k=0; k <= AFofMaxLikelihood; k++) {
|
||||
|
||||
|
|
@ -415,17 +415,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||
}
|
||||
//System.out.println(myAlleles.toString());
|
||||
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||
|
||||
}
|
||||
|
||||
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
||||
|
||||
if ( !sample.getValue().hasLikelihoods() )
|
||||
for ( final Genotype genotype : GLs ) {
|
||||
if ( !genotype.hasLikelihoods() )
|
||||
continue;
|
||||
Genotype g = GLs.get(sample.getKey());
|
||||
Genotype g = GLs.get(genotype.getSampleName());
|
||||
|
||||
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
||||
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
||||
|
||||
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
|
||||
continue; // regular likelihoods
|
||||
|
|
@ -436,7 +435,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
myAlleles.add(Allele.NO_CALL);
|
||||
myAlleles.add(Allele.NO_CALL);
|
||||
//System.out.println(myAlleles.toString());
|
||||
calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false));
|
||||
calls.add(new Genotype(genotype.getSampleName(), myAlleles, qual, null, g.getAttributes(), false));
|
||||
}
|
||||
return calls;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,270 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeCollection;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
|
||||
|
||||
// for use in optimizing the P(D|AF) calculations:
|
||||
// how much off from the max likelihoods do we need to be before we can quit calculating?
|
||||
protected static final double LOG10_OPTIMIZATION_EPSILON = 8.0;
|
||||
|
||||
private AlleleFrequencyMatrix AFMatrix;
|
||||
|
||||
protected GridSearchAFEstimation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
AFMatrix = new AlleleFrequencyMatrix(N);
|
||||
}
|
||||
|
||||
protected void getLog10PNonRef(Map<String, Genotype> GLs, List<Allele> alleles,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[] log10AlleleFrequencyPosteriors) {
|
||||
initializeAFMatrix(GLs);
|
||||
|
||||
// first, calculate for AF=0 (no change to matrix)
|
||||
log10AlleleFrequencyPosteriors[0] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[0];
|
||||
double maxLikelihoodSeen = log10AlleleFrequencyPosteriors[0];
|
||||
|
||||
int maxAlleleFrequencyToTest = AFMatrix.getSamples().size() * 2;
|
||||
|
||||
// for each minor allele frequency, calculate log10PofDgivenAFi
|
||||
for (int i = 1; i <= maxAlleleFrequencyToTest; i++) {
|
||||
// add one more alternate allele
|
||||
AFMatrix.incrementFrequency();
|
||||
|
||||
// calculate new likelihoods
|
||||
log10AlleleFrequencyPosteriors[i] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[i];
|
||||
|
||||
// an optimization to speed up the calculation: if we are beyond the local maximum such
|
||||
// that subsequent likelihoods won't factor into the confidence score, just quit
|
||||
if ( maxLikelihoodSeen - log10AlleleFrequencyPosteriors[i] > LOG10_OPTIMIZATION_EPSILON )
|
||||
return;
|
||||
|
||||
if ( log10AlleleFrequencyPosteriors[i] > maxLikelihoodSeen )
|
||||
maxLikelihoodSeen = log10AlleleFrequencyPosteriors[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Overrides the super class
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
||||
*
|
||||
* @return calls
|
||||
*/
|
||||
protected GenotypeCollection assignGenotypes(VariantContext vc,
|
||||
double[] log10AlleleFrequencyPosteriors,
|
||||
int AFofMaxLikelihood) {
|
||||
if ( !vc.isVariant() )
|
||||
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
Allele refAllele = vc.getReference();
|
||||
Allele altAllele = vc.getAlternateAllele(0);
|
||||
GenotypeCollection calls = GenotypeCollection.create();
|
||||
|
||||
// first, the potential alt calls
|
||||
for ( String sample : AFMatrix.getSamples() ) {
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
|
||||
// set the genotype and confidence
|
||||
Pair<Integer, Double> AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample);
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) {
|
||||
myAlleles.add(refAllele);
|
||||
myAlleles.add(refAllele);
|
||||
} else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) {
|
||||
myAlleles.add(refAllele);
|
||||
myAlleles.add(altAllele);
|
||||
} else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() )
|
||||
myAlleles.add(altAllele);
|
||||
myAlleles.add(altAllele);
|
||||
}
|
||||
|
||||
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, g.getAttributes(), false));
|
||||
}
|
||||
|
||||
return calls;
|
||||
}
|
||||
|
||||
private void initializeAFMatrix(Map<String, Genotype> GLs) {
|
||||
AFMatrix.clear();
|
||||
|
||||
for ( Genotype g : GLs.values() ) {
|
||||
if ( g.hasLikelihoods() )
|
||||
AFMatrix.setLikelihoods(g.getLikelihoods().getAsVector(), g.getSampleName());
|
||||
}
|
||||
}
|
||||
|
||||
protected static class AlleleFrequencyMatrix {
|
||||
|
||||
private double[][] matrix; // allele frequency matrix
|
||||
private int[] indexes; // matrix to maintain which genotype is active
|
||||
private int maxN; // total possible frequencies in data
|
||||
private int frequency; // current frequency
|
||||
|
||||
// data structures necessary to maintain a list of the best genotypes and their scores
|
||||
private ArrayList<String> samples = new ArrayList<String>();
|
||||
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
|
||||
|
||||
public AlleleFrequencyMatrix(int N) {
|
||||
maxN = N;
|
||||
matrix = new double[N][3];
|
||||
indexes = new int[N];
|
||||
clear();
|
||||
}
|
||||
|
||||
public List<String> getSamples() { return samples; }
|
||||
|
||||
public void clear() {
|
||||
frequency = 0;
|
||||
for (int i = 0; i < maxN; i++)
|
||||
indexes[i] = 0;
|
||||
samples.clear();
|
||||
samplesToGenotypesPerAF.clear();
|
||||
}
|
||||
|
||||
public void setLikelihoods(double[] GLs, String sample) {
|
||||
int index = samples.size();
|
||||
samples.add(sample);
|
||||
matrix[index][GenotypeType.AA.ordinal()] = GLs[0];
|
||||
matrix[index][GenotypeType.AB.ordinal()] = GLs[1];
|
||||
matrix[index][GenotypeType.BB.ordinal()] = GLs[2];
|
||||
}
|
||||
|
||||
public void incrementFrequency() {
|
||||
int N = samples.size();
|
||||
if ( frequency == 2 * N )
|
||||
throw new ReviewedStingException("Frequency was incremented past N; how is this possible?");
|
||||
frequency++;
|
||||
|
||||
double greedy = VALUE_NOT_CALCULATED;
|
||||
int greedyIndex = -1;
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
||||
if ( indexes[i] == GenotypeType.AB.ordinal() ) {
|
||||
if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) {
|
||||
greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()];
|
||||
greedyIndex = i;
|
||||
}
|
||||
}
|
||||
else if ( indexes[i] == GenotypeType.AA.ordinal() ) {
|
||||
if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) {
|
||||
greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()];
|
||||
greedyIndex = i;
|
||||
}
|
||||
// note that we currently don't bother with breaking ties between samples
|
||||
// (which would be done by looking at the HOM_VAR value) because it's highly
|
||||
// unlikely that a collision will both occur and that the difference will
|
||||
// be significant at HOM_VAR...
|
||||
}
|
||||
// if this person is already hom var, he can't add another alternate allele
|
||||
// so we can ignore that case
|
||||
}
|
||||
if ( greedyIndex == -1 )
|
||||
throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?");
|
||||
|
||||
if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() )
|
||||
indexes[greedyIndex] = GenotypeType.BB.ordinal();
|
||||
else
|
||||
indexes[greedyIndex] = GenotypeType.AB.ordinal();
|
||||
}
|
||||
|
||||
public double getLikelihoodsOfFrequency() {
|
||||
double likelihoods = 0.0;
|
||||
int N = samples.size();
|
||||
for (int i = 0; i < N; i++)
|
||||
likelihoods += matrix[i][indexes[i]];
|
||||
|
||||
/*
|
||||
System.out.println(frequency);
|
||||
for (int i = 0; i < N; i++) {
|
||||
System.out.print(samples.get(i));
|
||||
for (int j=0; j < 3; j++) {
|
||||
System.out.print(String.valueOf(matrix[i][j]));
|
||||
System.out.print(indexes[i] == j ? "* " : " ");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
System.out.println(likelihoods);
|
||||
System.out.println();
|
||||
*/
|
||||
|
||||
recordGenotypes();
|
||||
|
||||
return likelihoods;
|
||||
}
|
||||
|
||||
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
|
||||
return samplesToGenotypesPerAF.get(frequency).get(sample);
|
||||
}
|
||||
|
||||
private void recordGenotypes() {
|
||||
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
|
||||
|
||||
int index = 0;
|
||||
for ( String sample : samples ) {
|
||||
int genotype = indexes[index];
|
||||
|
||||
double score;
|
||||
|
||||
int maxEntry = MathUtils.maxElementIndex(matrix[index]);
|
||||
// if the max value is for the most likely genotype, we can compute next vs. next best
|
||||
if ( genotype == maxEntry ) {
|
||||
if ( genotype == GenotypeType.AA.ordinal() )
|
||||
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
|
||||
else if ( genotype == GenotypeType.AB.ordinal() )
|
||||
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]);
|
||||
else // ( genotype == GenotypeType.HOM.ordinal() )
|
||||
score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]);
|
||||
}
|
||||
// otherwise, we need to calculate the probability of the genotype
|
||||
else {
|
||||
double[] normalized = MathUtils.normalizeFromLog10(matrix[index]);
|
||||
double chosenGenotype = normalized[genotype];
|
||||
score = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||
}
|
||||
|
||||
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
|
||||
index++;
|
||||
}
|
||||
|
||||
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -133,7 +133,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
|
|||
for ( VariantContext vc : VCs ) {
|
||||
if ( variantVC == null && vc.isVariant() )
|
||||
variantVC = vc;
|
||||
genotypes.putAll(getGenotypesWithGLs(vc.getGenotypes()));
|
||||
genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes()));
|
||||
}
|
||||
|
||||
if ( variantVC == null ) {
|
||||
|
|
@ -143,13 +143,12 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
|
|||
return new VariantContext("VCwithGLs", variantVC.getChr(), variantVC.getStart(), variantVC.getEnd(), variantVC.getAlleles(), genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
private static Map<String, Genotype> getGenotypesWithGLs(Map<String, Genotype> genotypes) {
|
||||
Map<String, Genotype> genotypesWithGLs = new HashMap<String, Genotype>();
|
||||
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
|
||||
if ( g.getValue().hasLikelihoods() && g.getValue().getLikelihoods().getAsVector() != null )
|
||||
genotypesWithGLs.put(g.getKey(), g.getValue());
|
||||
private static GenotypeCollection getGenotypesWithGLs(GenotypeCollection genotypes) {
|
||||
GenotypeCollection genotypesWithGLs = GenotypeCollection.create(genotypes.size());
|
||||
for ( final Genotype g : genotypes ) {
|
||||
if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null )
|
||||
genotypesWithGLs.add(g);
|
||||
}
|
||||
|
||||
return genotypesWithGLs;
|
||||
}
|
||||
}
|
||||
|
|
@ -281,7 +281,7 @@ public class UnifiedGenotyperEngine {
|
|||
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
|
||||
genotypes.put(GL.getSample(), new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false));
|
||||
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false));
|
||||
}
|
||||
|
||||
GenomeLoc loc = refContext.getLocus();
|
||||
|
|
@ -811,9 +811,6 @@ public class UnifiedGenotyperEngine {
|
|||
case EXACT:
|
||||
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
|
||||
break;
|
||||
case GRID_SEARCH:
|
||||
afcm = new GridSearchAFEstimation(UAC, N, logger, verboseWriter);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1059,15 +1059,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
GenotypeCollection genotypes = GenotypeCollection.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
|
||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||
|
||||
if ( call.isCall() ) // we made a call - put actual het genotype here:
|
||||
genotypes.put(sample,new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
|
||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.put(sample,new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
|
||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false));
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
|
|
@ -1151,11 +1150,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
GenotypeCollection genotypes = GenotypeCollection.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false));
|
||||
genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false));
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) );
|
||||
genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) );
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
|||
if (useSingleSample != null) { // only want to output context for one sample
|
||||
Genotype sampGt = vc.getGenotype(useSingleSample);
|
||||
if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP!
|
||||
vc = vc.subContextFromGenotypes(sampGt);
|
||||
vc = vc.subContextFromSample(sampGt.getSampleName());
|
||||
else // asked for a sample that this vc does not contain, so ignore this vc:
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -293,7 +293,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
|||
if (tracker != null) {
|
||||
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
|
||||
|
||||
GenotypeCollection genotypeCollection = vc.getGenotypes();
|
||||
GenotypeCollection genotypeCollection = GenotypeCollection.create(vc.getGenotypes().size());
|
||||
|
||||
for (Trio trio : trios) {
|
||||
Genotype mother = vc.getGenotype(trio.getMother());
|
||||
|
|
@ -306,9 +306,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
|
|||
Genotype phasedFather = trioGenotypes.get(1);
|
||||
Genotype phasedChild = trioGenotypes.get(2);
|
||||
|
||||
genotypeCollection.put(phasedMother.getSampleName(), phasedMother);
|
||||
genotypeCollection.put(phasedFather.getSampleName(), phasedFather);
|
||||
genotypeCollection.put(phasedChild.getSampleName(), phasedChild);
|
||||
genotypeCollection.add(phasedMother, phasedFather, phasedChild);
|
||||
}
|
||||
|
||||
VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeCollection);
|
||||
|
|
|
|||
|
|
@ -122,7 +122,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
public int MIN_MAPPING_QUALITY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
|
||||
protected List<String> samplesToPhase = null;
|
||||
protected Set
|
||||
<String> samplesToPhase = null;
|
||||
|
||||
private GenomeLoc mostDownstreamLocusReached = null;
|
||||
|
||||
|
|
@ -272,10 +273,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
|
||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));
|
||||
|
||||
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
|
||||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values());
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
@ -354,9 +355,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
// Perform per-sample phasing:
|
||||
GenotypeCollection sampGenotypes = vc.getGenotypes();
|
||||
Map<String, PhaseCounts> samplePhaseStats = new TreeMap<String, PhaseCounts>();
|
||||
for (Map.Entry<String, Genotype> sampGtEntry : sampGenotypes.entrySet()) {
|
||||
String samp = sampGtEntry.getKey();
|
||||
Genotype gt = sampGtEntry.getValue();
|
||||
for (final Genotype gt : sampGenotypes) {
|
||||
String samp = gt.getSampleName();
|
||||
|
||||
if (DEBUG) logger.debug("sample = " + samp);
|
||||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||
|
|
@ -1134,7 +1134,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
this.start = vc.getStart();
|
||||
this.stop = vc.getEnd();
|
||||
this.alleles = vc.getAlleles();
|
||||
this.genotypes = GenotypeCollection.create(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
|
||||
this.genotypes = GenotypeCollection.copy(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
|
||||
this.negLog10PError = vc.getNegLog10PError();
|
||||
this.filters = vc.filtersWereApplied() ? vc.getFilters() : null;
|
||||
this.attributes = new HashMap<String, Object>(vc.getAttributes());
|
||||
|
|
@ -1153,7 +1153,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
public void setGenotype(String sample, Genotype newGt) {
|
||||
genotypes.put(sample, newGt);
|
||||
genotypes.add(newGt);
|
||||
}
|
||||
|
||||
public void setPhasingInconsistent() {
|
||||
|
|
|
|||
|
|
@ -157,8 +157,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
// A C A
|
||||
// A C C
|
||||
|
||||
for (Genotype g : vc1.getGenotypes().values()) {
|
||||
String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null;
|
||||
for (final Genotype g : vc1.getGenotypes()) {
|
||||
final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null;
|
||||
|
||||
switch (g.getType()) {
|
||||
case NO_CALL:
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ public class G1KPhaseITable extends VariantEvaluator {
|
|||
}
|
||||
|
||||
// count variants per sample
|
||||
for (final Genotype g : eval.getGenotypes().values()) {
|
||||
for (final Genotype g : eval.getGenotypes()) {
|
||||
if ( ! g.isNoCall() && ! g.isHomRef() ) {
|
||||
int count = countsPerSample.get(g.getSampleName()).get(eval.getType());
|
||||
countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1);
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
//public GenotypeConcordance(VariantEvalWalker parent) {
|
||||
// super(parent);
|
||||
// discordantInteresting = parent.DISCORDANT_INTERESTING;
|
||||
// discordantInteresting = parent.DISCORDANT_INTERESTING;
|
||||
//}
|
||||
|
||||
public String getName() {
|
||||
|
|
@ -277,8 +277,9 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
// determine concordance for eval data
|
||||
if (eval != null) {
|
||||
for (final String sample : eval.getGenotypes().keySet()) {
|
||||
final Genotype.Type called = eval.getGenotype(sample).getType();
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
|
|
@ -299,9 +300,9 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
|
||||
for (final String sample : validation.getGenotypes().keySet()) {
|
||||
final Genotype.Type truth = validation.getGenotype(sample).getType();
|
||||
detailedStats.incrValue(sample, truth, called);
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
detailedStats.incrValue(g.getSampleName(), truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
|
|
@ -410,8 +411,8 @@ class SampleStats implements TableType {
|
|||
|
||||
public SampleStats(VariantContext vc, int nGenotypeTypes) {
|
||||
this.nGenotypeTypes = nGenotypeTypes;
|
||||
for (String sample : vc.getGenotypes().keySet())
|
||||
concordanceStats.put(sample, new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
for (final Genotype g : vc.getGenotypes())
|
||||
concordanceStats.put(g.getSampleName(), new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
}
|
||||
|
||||
public SampleStats(int genotypeTypes) {
|
||||
|
|
@ -511,8 +512,8 @@ class SampleSummaryStats implements TableType {
|
|||
|
||||
public SampleSummaryStats(final VariantContext vc) {
|
||||
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||
concordanceSummary.put(sample, new double[COLUMN_KEYS.length]);
|
||||
for( final Genotype g : vc.getGenotypes() ) {
|
||||
concordanceSummary.put(g.getSampleName(), new double[COLUMN_KEYS.length]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested sample
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
|
||||
return getSubsetOfVariantContext(vc, Arrays.asList(sampleName));
|
||||
return getSubsetOfVariantContext(vc, new HashSet<String>(Arrays.asList(sampleName)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -276,7 +276,7 @@ public class VariantEvalUtils {
|
|||
* @param sampleNames the samples to pull out of the VariantContext
|
||||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
|
||||
|
||||
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
|
||||
|
|
|
|||
|
|
@ -212,15 +212,15 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
// create new Genotype objects
|
||||
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
|
||||
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
List<Allele> newAlleles = new ArrayList<Allele>();
|
||||
for ( Allele allele : genotype.getValue().getAlleles() ) {
|
||||
for ( Allele allele : genotype.getAlleles() ) {
|
||||
Allele newA = alleleMap.get(allele);
|
||||
if ( newA == null )
|
||||
newA = Allele.NO_CALL;
|
||||
newAlleles.add(newA);
|
||||
}
|
||||
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
}
|
||||
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel);
|
||||
|
|
|
|||
|
|
@ -557,7 +557,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
// check if we find it in the variant rod
|
||||
GenotypeCollection genotypes = vc.getGenotypes(samples);
|
||||
for (Genotype g : genotypes.values()) {
|
||||
for (final Genotype g : genotypes) {
|
||||
if (sampleHasVariant(g)) {
|
||||
// There is a variant called (or filtered with not exclude filtered option set) that is not HomRef for at least one of the samples.
|
||||
if (compVCs == null)
|
||||
|
|
|
|||
|
|
@ -130,8 +130,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
||||
GenotypeCollection genotypes = GenotypeCollection.create(1);
|
||||
genotypes.put(sampleName, g);
|
||||
GenotypeCollection genotypes = GenotypeCollection.create(g);
|
||||
vc = VariantContext.modifyGenotypes(vc, genotypes);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ public class StandardVCFWriter extends IndexingVCFWriter {
|
|||
boolean sawGoodGT = false;
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.isAvailable() )
|
||||
sawGoodGT = true;
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.put(sampleName, new Genotype(sampleName,
|
||||
genotypes.add(new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
|
|
|
|||
|
|
@ -209,13 +209,7 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.put(sampleName,
|
||||
new Genotype(sampleName,
|
||||
GTalleles,
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -155,12 +155,12 @@ public class GCF {
|
|||
if ( genotypes.isEmpty() )
|
||||
return VariantContext.NO_GENOTYPES;
|
||||
else {
|
||||
GenotypeCollection map = GenotypeCollection.create();
|
||||
GenotypeCollection map = GenotypeCollection.create(genotypes.size());
|
||||
|
||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||
final String sampleName = header.getSample(i);
|
||||
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
|
||||
map.put(sampleName, g);
|
||||
map.add(g);
|
||||
}
|
||||
|
||||
return map;
|
||||
|
|
@ -173,7 +173,7 @@ public class GCF {
|
|||
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
|
||||
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
for ( Genotype g : vc.getGenotypes() ) {
|
||||
int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
|
||||
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,20 +67,28 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
}
|
||||
|
||||
public static final GenotypeCollection create(final int nGenotypes) {
|
||||
return new GenotypeCollection(nGenotypes, true);
|
||||
return new GenotypeCollection(nGenotypes, false);
|
||||
}
|
||||
|
||||
// todo -- differentiate between empty constructor and copy constructor
|
||||
// todo -- create constructor (Genotype ... genotypes)
|
||||
|
||||
public static final GenotypeCollection create(final ArrayList<Genotype> genotypes) {
|
||||
return new GenotypeCollection(genotypes, true);
|
||||
return new GenotypeCollection(genotypes, false);
|
||||
}
|
||||
|
||||
public static final GenotypeCollection create(final Genotype... genotypes) {
|
||||
return new GenotypeCollection(new ArrayList<Genotype>(Arrays.asList(genotypes)), false);
|
||||
}
|
||||
|
||||
public static final GenotypeCollection copy(final GenotypeCollection toCopy) {
|
||||
return create(toCopy.genotypes);
|
||||
}
|
||||
|
||||
public static final GenotypeCollection copy(final Collection<Genotype> toCopy) {
|
||||
return create(new ArrayList<Genotype>(toCopy));
|
||||
}
|
||||
|
||||
// public static final GenotypeMap create(final Collection<Genotype> genotypes) {
|
||||
// if ( genotypes == null )
|
||||
// return null; // todo -- really should return an empty map
|
||||
|
|
@ -173,6 +181,12 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
return genotypes.add(genotype);
|
||||
}
|
||||
|
||||
public boolean add(final Genotype ... genotype) {
|
||||
checkImmutability();
|
||||
invalidateCaches();
|
||||
return genotypes.addAll(Arrays.asList(genotype));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(final int i, final Genotype genotype) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
@ -291,7 +305,7 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
return genotypes.toArray(ts);
|
||||
}
|
||||
|
||||
public Iterable<Genotype> iterateInOrder(final Iterable<String> sampleNamesInOrder) {
|
||||
public Iterable<Genotype> iterateInSampleNameOrder(final Iterable<String> sampleNamesInOrder) {
|
||||
return new Iterable<Genotype>() {
|
||||
@Override
|
||||
public Iterator<Genotype> iterator() {
|
||||
|
|
@ -300,6 +314,10 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
};
|
||||
}
|
||||
|
||||
public Iterable<Genotype> iterateInSampleNameOrder() {
|
||||
return iterateInSampleNameOrder(getSampleNamesOrderedByName());
|
||||
}
|
||||
|
||||
private final class InOrderIterator implements Iterator<Genotype> {
|
||||
final Iterator<String> sampleNamesInOrder;
|
||||
|
||||
|
|
@ -322,4 +340,41 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public Set<String> getSampleNames() {
|
||||
buildCache();
|
||||
return sampleNameToOffset.keySet();
|
||||
}
|
||||
|
||||
public Set<String> getSampleNamesOrderedByName() {
|
||||
return new TreeSet<String>(getSampleNames());
|
||||
}
|
||||
|
||||
public boolean containsSample(final String sample) {
|
||||
buildCache();
|
||||
return sampleNameToOffset.containsKey(sample);
|
||||
}
|
||||
|
||||
public boolean containsSamples(final Collection<String> samples) {
|
||||
buildCache();
|
||||
return getSampleNames().containsAll(samples);
|
||||
}
|
||||
|
||||
public GenotypeCollection subsetToSamples( final Collection<String> samples ) {
|
||||
return subsetToSamples(new HashSet<String>(samples));
|
||||
}
|
||||
|
||||
public GenotypeCollection subsetToSamples( final Set<String> samples ) {
|
||||
if ( samples.size() == genotypes.size() )
|
||||
return this;
|
||||
else if ( samples.isEmpty() )
|
||||
return NO_GENOTYPES;
|
||||
else {
|
||||
GenotypeCollection subset = create(samples.size());
|
||||
for ( final Genotype g : genotypes )
|
||||
if ( samples.contains(g.getSampleName()) )
|
||||
subset.add(g);
|
||||
return subset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -279,7 +280,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
||||
this(source, contig, start, stop, alleles,
|
||||
GenotypeCollection.create(genotypes),
|
||||
GenotypeCollection.copy(genotypes),
|
||||
negLog10PError, filters, attributes, null, false);
|
||||
}
|
||||
|
||||
|
|
@ -423,58 +424,73 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
|
||||
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually
|
||||
* variant or not.
|
||||
*
|
||||
* @param genotype genotype
|
||||
* @return vc subcontext
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Genotype genotype) {
|
||||
return subContextFromGenotypes(Arrays.asList(genotype));
|
||||
}
|
||||
// /**
|
||||
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
|
||||
// * genotype and alleles in genotype. This is the right way to test if a single genotype is actually
|
||||
// * variant or not.
|
||||
// *
|
||||
// * @param genotype genotype
|
||||
// * @return vc subcontext
|
||||
// * @deprecated replaced by {@link #subContextFromSample(String)}
|
||||
// */
|
||||
// public VariantContext subContextFromGenotypes(Genotype genotype) {
|
||||
// return subContextFromGenotypes(Arrays.asList(genotype));
|
||||
// }
|
||||
//
|
||||
//
|
||||
// /**
|
||||
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
|
||||
// * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
|
||||
// * variant or not.
|
||||
// *
|
||||
// * @param genotypes genotypes
|
||||
// * @return vc subcontext
|
||||
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection)}
|
||||
// */
|
||||
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
|
||||
// return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
|
||||
// * genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes
|
||||
// *
|
||||
// * @param genotypes genotypes
|
||||
// * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
|
||||
// * @return vc subcontext
|
||||
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection, java.util.Collection)}
|
||||
// */
|
||||
// @Deprecated
|
||||
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
|
||||
// return new VariantContext(getSource(), contig, start, stop, alleles,
|
||||
// GenotypeCollection.create(genotypes),
|
||||
// getNegLog10PError(),
|
||||
// filtersWereApplied() ? getFilters() : null,
|
||||
// getAttributes(),
|
||||
// getReferenceBaseForIndel());
|
||||
// }
|
||||
|
||||
|
||||
/**
|
||||
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
|
||||
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
|
||||
* variant or not.
|
||||
*
|
||||
* @param genotypes genotypes
|
||||
* @return vc subcontext
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
|
||||
return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
|
||||
* genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes
|
||||
*
|
||||
* @param genotypes genotypes
|
||||
* @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
|
||||
* @return vc subcontext
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
||||
return new VariantContext(getSource(), contig, start, stop, alleles,
|
||||
GenotypeCollection.create(genotypes),
|
||||
genotypes.subsetToSamples(sampleNames),
|
||||
getNegLog10PError(),
|
||||
filtersWereApplied() ? getFilters() : null,
|
||||
getAttributes(),
|
||||
getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSamples(Collection<String> sampleNames, Collection<Allele> alleles) {
|
||||
return subContextFromGenotypes(getGenotypes(sampleNames).values(), alleles);
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSamples(Collection<String> sampleNames) {
|
||||
return subContextFromGenotypes(getGenotypes(sampleNames).values());
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
||||
GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
return new VariantContext(getSource(), contig, start, stop, allelesOfGenotypes(newGenotypes),
|
||||
newGenotypes,
|
||||
getNegLog10PError(),
|
||||
filtersWereApplied() ? getFilters() : null,
|
||||
getAttributes(),
|
||||
getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromGenotypes(getGenotype(sampleName));
|
||||
return subContextFromSamples(new HashSet<String>(Arrays.asList(sampleName)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -875,16 +891,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
*/
|
||||
public boolean hasGenotypes() {
|
||||
loadGenotypes();
|
||||
return genotypes.size() > 0;
|
||||
return ! genotypes.isEmpty();
|
||||
}
|
||||
|
||||
public boolean hasGenotypes(Collection<String> sampleNames) {
|
||||
loadGenotypes();
|
||||
for ( String name : sampleNames ) {
|
||||
if ( ! genotypes.containsKey(name) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return genotypes.containsSamples(sampleNames);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -895,10 +907,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return genotypes;
|
||||
}
|
||||
|
||||
public List<Genotype> getGenotypesSortedByName() {
|
||||
public Iterable<Genotype> getGenotypesSortedByName() {
|
||||
loadGenotypes();
|
||||
Collection<Genotype> types = new TreeMap<String,Genotype>(genotypes).values();
|
||||
return new ArrayList<Genotype>(types);
|
||||
return genotypes.iterateInSampleNameOrder();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -922,24 +933,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||
*/
|
||||
public GenotypeCollection getGenotypes(Collection<String> sampleNames) {
|
||||
GenotypeCollection map = GenotypeCollection.create(sampleNames.size());
|
||||
|
||||
for ( String name : sampleNames ) {
|
||||
if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames);
|
||||
final Genotype g = getGenotype(name);
|
||||
if ( g != null ) {
|
||||
map.put(name, g);
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
return getGenotypes().subsetToSamples(sampleNames);
|
||||
}
|
||||
|
||||
public GenotypeCollection getGenotypes(Set<String> sampleNames) {
|
||||
return getGenotypes().subsetToSamples(sampleNames);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the set of all sample names in this context
|
||||
* @return the set of all sample names in this context, not ordered
|
||||
*/
|
||||
public Set<String> getSampleNames() {
|
||||
return getGenotypes().keySet();
|
||||
return getGenotypes().getSampleNames();
|
||||
}
|
||||
|
||||
public Set<String> getSampleNamesOrderedByName() {
|
||||
return getGenotypes().getSampleNamesOrderedByName();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -952,11 +962,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
public boolean hasGenotype(String sample) {
|
||||
return getGenotypes().containsKey(sample);
|
||||
return getGenotypes().containsSample(sample);
|
||||
}
|
||||
|
||||
public Genotype getGenotype(int ith) {
|
||||
return getGenotypesSortedByName().get(ith);
|
||||
return genotypes.get(ith);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -968,7 +978,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
public int getChromosomeCount() {
|
||||
int n = 0;
|
||||
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
n += g.isNoCall() ? 0 : g.getPloidy();
|
||||
}
|
||||
|
||||
|
|
@ -984,7 +994,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
public int getChromosomeCount(Allele a) {
|
||||
int n = 0;
|
||||
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
n += g.getAlleles(a).size();
|
||||
}
|
||||
|
||||
|
|
@ -1015,7 +1025,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
if ( genotypeCounts == null ) {
|
||||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
if ( g.isNoCall() )
|
||||
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
|
||||
else if ( g.isHomRef() )
|
||||
|
|
@ -1136,7 +1146,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
List<Allele> reportedAlleles = getAlleles();
|
||||
Set<Allele> observedAlleles = new HashSet<Allele>();
|
||||
observedAlleles.add(getReference());
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
if ( g.isCalled() )
|
||||
observedAlleles.addAll(g.getAlleles());
|
||||
}
|
||||
|
|
@ -1285,12 +1295,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
private void validateGenotypes() {
|
||||
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null");
|
||||
|
||||
for ( Map.Entry<String, Genotype> elt : this.genotypes.entrySet() ) {
|
||||
String name = elt.getKey();
|
||||
Genotype g = elt.getValue();
|
||||
|
||||
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
|
||||
|
||||
for ( final Genotype g : this.genotypes ) {
|
||||
if ( g.isAvailable() ) {
|
||||
for ( Allele gAllele : g.getAlleles() ) {
|
||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||
|
|
@ -1465,8 +1470,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
Byte refByte = inputVC.getReferenceBaseForIndel();
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
GenotypeCollection genotypes = GenotypeCollection.create();
|
||||
GenotypeCollection inputGenotypes = inputVC.getGenotypes();
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
|
|
@ -1483,11 +1486,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for (String sample : inputVC.getSampleNames()) {
|
||||
Genotype g = inputGenotypes.get(sample);
|
||||
|
||||
GenotypeCollection genotypes = GenotypeCollection.create(inputVC.getNSamples());
|
||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
||||
List<Allele> inAlleles = g.getAlleles();
|
||||
List<Allele> newGenotypeAlleles = new ArrayList<Allele>();
|
||||
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (Allele a : inAlleles) {
|
||||
if (a.isCalled()) {
|
||||
if (a.isSymbolic()) {
|
||||
|
|
@ -1506,8 +1508,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
newGenotypeAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
}
|
||||
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(),
|
||||
g.getFilters(),g.getAttributes(),g.isPhased()));
|
||||
genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getNegLog10PError(),
|
||||
g.getFilters(), g.getAttributes(), g.isPhased()));
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1520,48 +1522,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
}
|
||||
|
||||
public ArrayList<Allele> getTwoAllelesWithHighestAlleleCounts() {
|
||||
// first idea: get two alleles with highest AC
|
||||
int maxAC1 = 0, maxAC2=0,maxAC1ind =0, maxAC2ind = 0;
|
||||
int i=0;
|
||||
int[] alleleCounts = new int[this.getAlleles().size()];
|
||||
ArrayList<Allele> alleleArray = new ArrayList<Allele>();
|
||||
for (Allele a:this.getAlleles()) {
|
||||
int ac = this.getChromosomeCount(a);
|
||||
if (ac >=maxAC1) {
|
||||
maxAC1 = ac;
|
||||
maxAC1ind = i;
|
||||
}
|
||||
alleleArray.add(a);
|
||||
alleleCounts[i++] = ac;
|
||||
}
|
||||
// now get second best allele
|
||||
for (i=0; i < alleleCounts.length; i++) {
|
||||
if (i == maxAC1ind)
|
||||
continue;
|
||||
if (alleleCounts[i] >= maxAC2) {
|
||||
maxAC2 = alleleCounts[i];
|
||||
maxAC2ind = i;
|
||||
}
|
||||
}
|
||||
|
||||
Allele alleleA, alleleB;
|
||||
if (alleleArray.get(maxAC1ind).isReference()) {
|
||||
alleleA = alleleArray.get(maxAC1ind);
|
||||
alleleB = alleleArray.get(maxAC2ind);
|
||||
}
|
||||
else if (alleleArray.get(maxAC2ind).isReference()) {
|
||||
alleleA = alleleArray.get(maxAC2ind);
|
||||
alleleB = alleleArray.get(maxAC1ind);
|
||||
} else {
|
||||
alleleA = alleleArray.get(maxAC1ind);
|
||||
alleleB = alleleArray.get(maxAC2ind);
|
||||
}
|
||||
ArrayList<Allele> a = new ArrayList<Allele>();
|
||||
a.add(alleleA);
|
||||
a.add(alleleB);
|
||||
return a;
|
||||
}
|
||||
public Allele getAltAlleleWithHighestAlleleCount() {
|
||||
// first idea: get two alleles with highest AC
|
||||
Allele best = null;
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class VariantContextUtils {
|
|||
* @return VariantContext object
|
||||
*/
|
||||
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
||||
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.create(genotypes), negLog10PError, filters, attributes);
|
||||
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.copy(genotypes), negLog10PError, filters, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -351,10 +351,9 @@ public class VariantContextUtils {
|
|||
|
||||
// Genotypes
|
||||
final GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes().values() ) {
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
|
||||
genotypes.put(g.getSampleName(),
|
||||
new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(),
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(),
|
||||
genotypeAttributes, g.isPhased()));
|
||||
}
|
||||
|
||||
|
|
@ -682,9 +681,9 @@ public class VariantContextUtils {
|
|||
if (!hasNullAlleles)
|
||||
return inputVC;
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( Map.Entry<String, Genotype> sample : inputVC.getGenotypes().entrySet() ) {
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
|
||||
List<Allele> originalAlleles = sample.getValue().getAlleles();
|
||||
List<Allele> originalAlleles = genotype.getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
|
|
@ -692,7 +691,7 @@ public class VariantContextUtils {
|
|||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
|
@ -705,8 +704,8 @@ public class VariantContextUtils {
|
|||
public static GenotypeCollection stripPLs(GenotypeCollection genotypes) {
|
||||
GenotypeCollection newGs = GenotypeCollection.create(genotypes.size());
|
||||
|
||||
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
|
||||
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue());
|
||||
for ( final Genotype g : genotypes ) {
|
||||
newGs.add(g.hasLikelihoods() ? removePLs(g) : g);
|
||||
}
|
||||
|
||||
return newGs;
|
||||
|
|
@ -884,9 +883,9 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
private static void mergeGenotypes(GenotypeCollection mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
||||
for ( Genotype g : oneVC.getGenotypes().values() ) {
|
||||
for ( Genotype g : oneVC.getGenotypes() ) {
|
||||
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
||||
if ( ! mergedGenotypes.containsKey(name) ) {
|
||||
if ( ! mergedGenotypes.containsSample(name) ) {
|
||||
// only add if the name is new
|
||||
Genotype newG = g;
|
||||
|
||||
|
|
@ -895,7 +894,7 @@ public class VariantContextUtils {
|
|||
newG = new Genotype(name, alleles, g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased());
|
||||
}
|
||||
|
||||
mergedGenotypes.put(name, newG);
|
||||
mergedGenotypes.add(newG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -924,15 +923,15 @@ public class VariantContextUtils {
|
|||
|
||||
// create new Genotype objects
|
||||
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
|
||||
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
List<Allele> newAlleles = new ArrayList<Allele>();
|
||||
for ( Allele allele : genotype.getValue().getAlleles() ) {
|
||||
for ( Allele allele : genotype.getAlleles() ) {
|
||||
Allele newAllele = alleleMap.get(allele);
|
||||
if ( newAllele == null )
|
||||
newAllele = Allele.NO_CALL;
|
||||
newAlleles.add(newAllele);
|
||||
}
|
||||
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
}
|
||||
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
|
|
@ -944,13 +943,13 @@ public class VariantContextUtils {
|
|||
return vc;
|
||||
|
||||
GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples());
|
||||
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getValue().getAttributes().entrySet() ) {
|
||||
for ( Map.Entry<String, Object> attr : genotype.getAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.put(genotype.getKey(), Genotype.modifyAttributes(genotype.getValue(), attrs));
|
||||
newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
|
||||
}
|
||||
|
||||
return VariantContext.modifyGenotypes(vc, newGenotypes);
|
||||
|
|
@ -1023,10 +1022,8 @@ public class VariantContextUtils {
|
|||
MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
|
||||
|
||||
GenotypeCollection mergedGenotypes = GenotypeCollection.create();
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
Genotype gt2 = vc2.getGenotype(sample);
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
|
@ -1052,8 +1049,8 @@ public class VariantContextUtils {
|
|||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(sample, mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
mergedGenotypes.put(sample, mergedGt);
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource());
|
||||
|
|
@ -1197,8 +1194,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
|
||||
for (Map.Entry<String, Genotype> gtEntry : vc.getGenotypes().entrySet()) {
|
||||
Genotype gt = gtEntry.getValue();
|
||||
for (final Genotype gt : vc.getGenotypes()) {
|
||||
if (gt.isNoCall() || gt.isFiltered())
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1210,10 +1206,8 @@ public class VariantContextUtils {
|
|||
|
||||
private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
Genotype gt2 = vc2.getGenotype(sample);
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
||||
return false;
|
||||
|
|
@ -1275,10 +1269,8 @@ public class VariantContextUtils {
|
|||
*/
|
||||
|
||||
public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
Genotype gt2 = vc2.getGenotype(sample);
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
|
@ -1309,10 +1301,8 @@ public class VariantContextUtils {
|
|||
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
||||
|
||||
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
Genotype gt2 = vc2.getGenotype(sample);
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
|
|
|||
|
|
@ -37,13 +37,14 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Test routine for new VariantContext object
|
||||
|
|
@ -93,7 +94,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( writer != null && n == 0 ) {
|
||||
if ( ! wroteHeader ) {
|
||||
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
|
||||
writer.writeHeader(createVCFHeader(vc));
|
||||
wroteHeader = true;
|
||||
}
|
||||
|
||||
|
|
@ -115,6 +116,10 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private static VCFHeader createVCFHeader(VariantContext vc) {
|
||||
return new VCFHeader(new HashSet<VCFHeaderLine>(), vc.getGenotypes().getSampleNamesSorted());
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = new HashMap<String,Object>();
|
||||
GenotypeCollection genotypes = GenotypeCollection.create();
|
||||
GenotypeCollection genotypes = GenotypeCollection.create(header.getGenotypeSamples().size());
|
||||
|
||||
alleles.add(Allele.create("-",true));
|
||||
alleles.add(Allele.create("CC",false));
|
||||
|
|
@ -133,7 +133,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
gtattributes.put("BB","1");
|
||||
Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true);
|
||||
|
||||
genotypes.put(name,gt);
|
||||
genotypes.add(gt);
|
||||
|
||||
}
|
||||
return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A');
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
int start = 10;
|
||||
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||
return new VariantContext(source, "1", start, stop, alleles,
|
||||
GenotypeCollection.create(genotypes), 1.0, filters, null, Cref.getBases()[0]);
|
||||
GenotypeCollection.copy(genotypes), 1.0, filters, null, Cref.getBases()[0]);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -509,7 +509,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
// necessary to not overload equals for genotypes
|
||||
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
|
||||
private void assertGenotypesAreMostlyEqual(GenotypeCollection actual, GenotypeCollection expected) {
|
||||
if (actual == expected) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -522,10 +522,8 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
|
||||
}
|
||||
|
||||
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
Genotype value = entry.getValue();
|
||||
Genotype expectedValue = expected.get(key);
|
||||
for (Genotype value : actual) {
|
||||
Genotype expectedValue = expected.get(value.getSampleName());
|
||||
|
||||
Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal");
|
||||
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal");
|
||||
|
|
@ -545,7 +543,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
|
||||
|
||||
// test genotypes
|
||||
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
|
||||
Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
|
|
|
|||
Loading…
Reference in New Issue