diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 523da7492..164e1f1cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -202,15 +202,6 @@ public class VariantContextAdaptors { } } - public static VCFHeader createVCFHeader(Set hInfo, VariantContext vc) { - HashSet names = new LinkedHashSet(); - for ( Genotype g : vc.getGenotypesSortedByName() ) { - names.add(g.getSampleName()); - } - - return new VCFHeader(hInfo == null ? new HashSet() : hInfo, names); - } - // -------------------------------------------------------------------------------------------------------------- // // GELI to VariantContext @@ -353,7 +344,7 @@ public class VariantContextAdaptors { } Genotype g = new Genotype(samples[i], myAlleles); - genotypes.put(samples[i], g); + genotypes.add(g); } HashMap attrs = new HashMap(1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 94b0636f4..551f8e2cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -89,9 +89,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); if (haplotypes != null) { - final Set> genotypes = vc.getGenotypes().entrySet(); - for ( final Map.Entry genotype : genotypes ) { - final AlignmentContext thisContext = stratifiedContexts.get(genotype.getKey()); + for ( final Genotype genotype : vc.getGenotypes()) { + final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName()); if ( thisContext != null ) { final ReadBackedPileup thisPileup; if (thisContext.hasExtendedEventPileup()) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index a21d7106c..917a75294 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -52,8 +52,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno double hetCount = 0.0; double homCount = 0.0; int N = 0; // number of samples that have likelihoods - for ( final Map.Entry genotypeMap : genotypes.entrySet() ) { - Genotype g = genotypeMap.getValue(); + for ( final Genotype g : genotypes ) { if ( g.isNoCall() || !g.hasLikelihoods() ) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index dae041155..3a1f2cc87 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -35,13 +35,13 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati int depth = 0; - for ( Map.Entry genotype : genotypes.entrySet() ) { + for ( final Genotype genotype : genotypes ) { // we care only about variant calls with likelihoods - if ( genotype.getValue().isHomRef() ) + if ( genotype.isHomRef() ) continue; - AlignmentContext context = stratifiedContexts.get(genotype.getKey()); + AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 8182747f4..97e014373 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -43,8 +43,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar if (vc.isSNP() && vc.isBiallelic()) { // todo - no current support for multiallelic snps - for ( final Map.Entry genotype : genotypes.entrySet() ) { - final AlignmentContext context = stratifiedContexts.get(genotype.getKey()); + for ( final Genotype genotype : genotypes ) { + final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) { continue; } @@ -53,8 +53,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar } else if (vc.isIndel() || vc.isMixed()) { - for ( final Map.Entry genotype : genotypes.entrySet() ) { - final AlignmentContext context = stratifiedContexts.get(genotype.getKey()); + for ( final Genotype genotype : genotypes ) { + final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) { continue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 6f481b872..a0bd69be7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -229,11 +229,10 @@ public class VariantAnnotatorEngine { return vc.getGenotypes(); GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples()); - for ( Map.Entry g : vc.getGenotypes().entrySet() ) { - Genotype genotype = g.getValue(); - AlignmentContext context = stratifiedContexts.get(g.getKey()); + for ( final Genotype genotype : vc.getGenotypes() ) { + AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) { - genotypes.put(g.getKey(), genotype); + genotypes.add(genotype); continue; } @@ -243,7 +242,7 @@ public class VariantAnnotatorEngine { if ( result != null ) genotypeAnnotations.putAll(result); } - genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased())); + genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased())); } return genotypes; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 89dd114cc..c03621280 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -202,9 +202,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { hapmapGenotypes = vc_comp.getGenotypes(); } - for ( Map.Entry originalGenotypes : vc_input.getGenotypes().entrySet() ) { - - Genotype g = originalGenotypes.getValue(); + for ( final Genotype g : vc_input.getGenotypes() ) { Set filters = new LinkedHashSet(g.getFilters()); boolean genotypeIsPhased = true; @@ -214,7 +212,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { // use sample as key into genotypes structure if (vc_comp != null) { - if (vc_input.getGenotypes().containsKey(sample) && hapmapGenotypes.containsKey(sample)) { + if (vc_input.getGenotypes().containsSample(sample) && hapmapGenotypes.containsSample(sample)) { Genotype hapmapGenotype = hapmapGenotypes.get(sample); if (hapmapGenotype.isCalled()){ @@ -325,13 +323,12 @@ public class BeagleOutputToVCFWalker extends RodWalker { else { originalAttributes.put("OG","."); } - Genotype imputedGenotype = new Genotype(originalGenotypes.getKey(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased); + Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased); if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) { beagleVarCounts++; } - genotypes.put(originalGenotypes.getKey(), imputedGenotype); - + genotypes.add(imputedGenotype); } VariantContext filteredVC; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index b722220f9..f7a84ee08 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.File; import java.io.PrintStream; @@ -245,18 +242,18 @@ public class ProduceBeagleInputWalker extends RodWalker { } if ( markers != null ) markers.append("\n"); - Map preferredGenotypes = preferredVC.getGenotypes(); - Map otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; + GenotypeCollection preferredGenotypes = preferredVC.getGenotypes(); + GenotypeCollection otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; for ( String sample : samples ) { boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; Genotype genotype; boolean isValidation; // use sample as key into genotypes structure - if ( preferredGenotypes.keySet().contains(sample) ) { + if ( preferredGenotypes.containsSample(sample) ) { genotype = preferredGenotypes.get(sample); isValidation = isValidationSite; - } else if ( otherGenotypes != null && otherGenotypes.keySet().contains(sample) ) { + } else if ( otherGenotypes != null && otherGenotypes.containsSample(sample) ) { genotype = otherGenotypes.get(sample); isValidation = ! isValidationSite; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index a447d17af..2587b30ef 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -102,7 +102,7 @@ public class VCFDiffableReader implements DiffableReader { vcRoot.add(attribute.getKey(), attribute.getValue()); } - for (Genotype g : vc.getGenotypes().values() ) { + for (Genotype g : vc.getGenotypes() ) { DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot); gRoot.add("GT", g.getGenotypeString()); gRoot.add("GQ", g.hasNegLog10PError() ? g.getNegLog10PError() * 10 : VCFConstants.MISSING_VALUE_v4 ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index c1fbc9ac6..81bff7aaa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -290,10 +290,7 @@ public class VariantFiltrationWalker extends RodWalker { genotypes = GenotypeCollection.create(vc.getGenotypes().size()); // for each genotype, check filters then create a new object - for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { - - Genotype g = genotype.getValue(); - + for ( final Genotype g : vc.getGenotypes() ) { if ( g.isCalled() ) { Set filters = new LinkedHashSet(g.getFilters()); @@ -301,9 +298,9 @@ public class VariantFiltrationWalker extends RodWalker { if ( VariantContextUtils.match(vc, g, exp) ) filters.add(exp.name); } - genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased())); + genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.isPhased())); } else { - genotypes.put(genotype.getKey(), g); + genotypes.add(g); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index d0f45092b..b81c1d4c3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -45,8 +45,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ EXACT, - /** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */ - GRID_SEARCH } protected int N; @@ -71,7 +69,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { * @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results */ - protected abstract void getLog10PNonRef(Map GLs, List Alleles, + protected abstract void getLog10PNonRef(GenotypeCollection GLs, List Alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index bb2516f7c..f0c73cd5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -50,7 +50,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { super(UAC, N, logger, verboseWriter); } - public void getLog10PNonRef(Map GLs, List alleles, + public void getLog10PNonRef(GenotypeCollection GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); @@ -94,11 +94,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - private static final ArrayList getGLs(Map GLs) { + private static final ArrayList getGLs(GenotypeCollection GLs) { ArrayList genotypeLikelihoods = new ArrayList(); genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy - for ( Genotype sample : GLs.values() ) { + for ( Genotype sample : GLs ) { if ( sample.hasLikelihoods() ) { double[] gls = sample.getLikelihoods().getAsVector(); @@ -154,7 +154,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - public int linearExact(Map GLs, + public int linearExact(GenotypeCollection GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { final ArrayList genotypeLikelihoods = getGLs(GLs); @@ -290,16 +290,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // todo = can't deal with optimal dynamic programming solution with multiallelic records if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { - sampleIndices.addAll(GLs.keySet()); + sampleIndices.addAll(GLs.getSampleNames()); sampleIdx = GLs.size(); } else { - for ( Map.Entry sample : GLs.entrySet() ) { - if ( !sample.getValue().hasLikelihoods() ) + for ( final Genotype genotype : GLs ) { + if ( !genotype.hasLikelihoods() ) continue; - double[] likelihoods = sample.getValue().getLikelihoods().getAsVector(); + double[] likelihoods = genotype.getLikelihoods().getAsVector(); if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) { //System.out.print(sample.getKey()+":"); @@ -311,7 +311,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { continue; } - sampleIndices.add(sample.getKey()); + sampleIndices.add(genotype.getSampleName()); for (int k=0; k <= AFofMaxLikelihood; k++) { @@ -415,17 +415,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { qual = -1.0 * Math.log10(1.0 - chosenGenotype); } //System.out.println(myAlleles.toString()); - calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); + calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); } - for ( Map.Entry sample : GLs.entrySet() ) { - - if ( !sample.getValue().hasLikelihoods() ) + for ( final Genotype genotype : GLs ) { + if ( !genotype.hasLikelihoods() ) continue; - Genotype g = GLs.get(sample.getKey()); + Genotype g = GLs.get(genotype.getSampleName()); - double[] likelihoods = sample.getValue().getLikelihoods().getAsVector(); + double[] likelihoods = genotype.getLikelihoods().getAsVector(); if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL) continue; // regular likelihoods @@ -436,7 +435,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { myAlleles.add(Allele.NO_CALL); myAlleles.add(Allele.NO_CALL); //System.out.println(myAlleles.toString()); - calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false)); + calls.add(new Genotype(genotype.getSampleName(), myAlleles, qual, null, g.getAttributes(), false)); } return calls; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java deleted file mode 100755 index 48df8dcb9..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeCollection; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.PrintStream; -import java.util.*; - -public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { - - // for use in optimizing the P(D|AF) calculations: - // how much off from the max likelihoods do we need to be before we can quit calculating? - protected static final double LOG10_OPTIMIZATION_EPSILON = 8.0; - - private AlleleFrequencyMatrix AFMatrix; - - protected GridSearchAFEstimation(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); - AFMatrix = new AlleleFrequencyMatrix(N); - } - - protected void getLog10PNonRef(Map GLs, List alleles, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { - initializeAFMatrix(GLs); - - // first, calculate for AF=0 (no change to matrix) - log10AlleleFrequencyPosteriors[0] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[0]; - double maxLikelihoodSeen = log10AlleleFrequencyPosteriors[0]; - - int maxAlleleFrequencyToTest = AFMatrix.getSamples().size() * 2; - - // for each minor allele frequency, calculate log10PofDgivenAFi - for (int i = 1; i <= maxAlleleFrequencyToTest; i++) { - // add one more alternate allele - AFMatrix.incrementFrequency(); - - // calculate new likelihoods - log10AlleleFrequencyPosteriors[i] = AFMatrix.getLikelihoodsOfFrequency() + log10AlleleFrequencyPriors[i]; - - // an optimization to speed up the calculation: if we are beyond the local maximum such - // that subsequent likelihoods won't factor into the confidence score, just quit - if ( maxLikelihoodSeen - log10AlleleFrequencyPosteriors[i] > LOG10_OPTIMIZATION_EPSILON ) - return; - - if ( log10AlleleFrequencyPosteriors[i] > maxLikelihoodSeen ) - maxLikelihoodSeen = log10AlleleFrequencyPosteriors[i]; - } - } - - /** - * Overrides the super class - * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - protected GenotypeCollection assignGenotypes(VariantContext vc, - double[] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood) { - if ( !vc.isVariant() ) - throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); - - Allele refAllele = vc.getReference(); - Allele altAllele = vc.getAlternateAllele(0); - GenotypeCollection calls = GenotypeCollection.create(); - - // first, the potential alt calls - for ( String sample : AFMatrix.getSamples() ) { - Genotype g = vc.getGenotype(sample); - - // set the genotype and confidence - Pair AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample); - ArrayList myAlleles = new ArrayList(); - if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) { - myAlleles.add(refAllele); - myAlleles.add(refAllele); - } else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) { - myAlleles.add(refAllele); - myAlleles.add(altAllele); - } else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() ) - myAlleles.add(altAllele); - myAlleles.add(altAllele); - } - - calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, g.getAttributes(), false)); - } - - return calls; - } - - private void initializeAFMatrix(Map GLs) { - AFMatrix.clear(); - - for ( Genotype g : GLs.values() ) { - if ( g.hasLikelihoods() ) - AFMatrix.setLikelihoods(g.getLikelihoods().getAsVector(), g.getSampleName()); - } - } - - protected static class AlleleFrequencyMatrix { - - private double[][] matrix; // allele frequency matrix - private int[] indexes; // matrix to maintain which genotype is active - private int maxN; // total possible frequencies in data - private int frequency; // current frequency - - // data structures necessary to maintain a list of the best genotypes and their scores - private ArrayList samples = new ArrayList(); - private HashMap>> samplesToGenotypesPerAF = new HashMap>>(); - - public AlleleFrequencyMatrix(int N) { - maxN = N; - matrix = new double[N][3]; - indexes = new int[N]; - clear(); - } - - public List getSamples() { return samples; } - - public void clear() { - frequency = 0; - for (int i = 0; i < maxN; i++) - indexes[i] = 0; - samples.clear(); - samplesToGenotypesPerAF.clear(); - } - - public void setLikelihoods(double[] GLs, String sample) { - int index = samples.size(); - samples.add(sample); - matrix[index][GenotypeType.AA.ordinal()] = GLs[0]; - matrix[index][GenotypeType.AB.ordinal()] = GLs[1]; - matrix[index][GenotypeType.BB.ordinal()] = GLs[2]; - } - - public void incrementFrequency() { - int N = samples.size(); - if ( frequency == 2 * N ) - throw new ReviewedStingException("Frequency was incremented past N; how is this possible?"); - frequency++; - - double greedy = VALUE_NOT_CALCULATED; - int greedyIndex = -1; - for (int i = 0; i < N; i++) { - - if ( indexes[i] == GenotypeType.AB.ordinal() ) { - if ( matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()] > greedy ) { - greedy = matrix[i][GenotypeType.BB.ordinal()] - matrix[i][GenotypeType.AB.ordinal()]; - greedyIndex = i; - } - } - else if ( indexes[i] == GenotypeType.AA.ordinal() ) { - if ( matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()] > greedy ) { - greedy = matrix[i][GenotypeType.AB.ordinal()] - matrix[i][GenotypeType.AA.ordinal()]; - greedyIndex = i; - } - // note that we currently don't bother with breaking ties between samples - // (which would be done by looking at the HOM_VAR value) because it's highly - // unlikely that a collision will both occur and that the difference will - // be significant at HOM_VAR... - } - // if this person is already hom var, he can't add another alternate allele - // so we can ignore that case - } - if ( greedyIndex == -1 ) - throw new ReviewedStingException("There is no best choice for a new alternate allele; how is this possible?"); - - if ( indexes[greedyIndex] == GenotypeType.AB.ordinal() ) - indexes[greedyIndex] = GenotypeType.BB.ordinal(); - else - indexes[greedyIndex] = GenotypeType.AB.ordinal(); - } - - public double getLikelihoodsOfFrequency() { - double likelihoods = 0.0; - int N = samples.size(); - for (int i = 0; i < N; i++) - likelihoods += matrix[i][indexes[i]]; - - /* - System.out.println(frequency); - for (int i = 0; i < N; i++) { - System.out.print(samples.get(i)); - for (int j=0; j < 3; j++) { - System.out.print(String.valueOf(matrix[i][j])); - System.out.print(indexes[i] == j ? "* " : " "); - } - System.out.println(); - } - System.out.println(likelihoods); - System.out.println(); - */ - - recordGenotypes(); - - return likelihoods; - } - - public Pair getGenotype(int frequency, String sample) { - return samplesToGenotypesPerAF.get(frequency).get(sample); - } - - private void recordGenotypes() { - HashMap> samplesToGenotypes = new HashMap>(); - - int index = 0; - for ( String sample : samples ) { - int genotype = indexes[index]; - - double score; - - int maxEntry = MathUtils.maxElementIndex(matrix[index]); - // if the max value is for the most likely genotype, we can compute next vs. next best - if ( genotype == maxEntry ) { - if ( genotype == GenotypeType.AA.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AB.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); - else if ( genotype == GenotypeType.AB.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.BB.ordinal()]); - else // ( genotype == GenotypeType.HOM.ordinal() ) - score = matrix[index][genotype] - Math.max(matrix[index][GenotypeType.AA.ordinal()], matrix[index][GenotypeType.AB.ordinal()]); - } - // otherwise, we need to calculate the probability of the genotype - else { - double[] normalized = MathUtils.normalizeFromLog10(matrix[index]); - double chosenGenotype = normalized[genotype]; - score = -1.0 * Math.log10(1.0 - chosenGenotype); - } - - samplesToGenotypes.put(sample, new Pair(genotype, Math.abs(score))); - index++; - } - - samplesToGenotypesPerAF.put(frequency, samplesToGenotypes); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 81310f15a..71ad0d75a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -133,7 +133,7 @@ public class UGCallVariants extends RodWalker { for ( VariantContext vc : VCs ) { if ( variantVC == null && vc.isVariant() ) variantVC = vc; - genotypes.putAll(getGenotypesWithGLs(vc.getGenotypes())); + genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes())); } if ( variantVC == null ) { @@ -143,13 +143,12 @@ public class UGCallVariants extends RodWalker { return new VariantContext("VCwithGLs", variantVC.getChr(), variantVC.getStart(), variantVC.getEnd(), variantVC.getAlleles(), genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, null); } - private static Map getGenotypesWithGLs(Map genotypes) { - Map genotypesWithGLs = new HashMap(); - for ( Map.Entry g : genotypes.entrySet() ) { - if ( g.getValue().hasLikelihoods() && g.getValue().getLikelihoods().getAsVector() != null ) - genotypesWithGLs.put(g.getKey(), g.getValue()); + private static GenotypeCollection getGenotypesWithGLs(GenotypeCollection genotypes) { + GenotypeCollection genotypesWithGLs = GenotypeCollection.create(genotypes.size()); + for ( final Genotype g : genotypes ) { + if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null ) + genotypesWithGLs.add(g); } - return genotypesWithGLs; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 72b88100b..a89545a66 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -281,7 +281,7 @@ public class UnifiedGenotyperEngine { attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth()); attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); - genotypes.put(GL.getSample(), new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false)); + genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false)); } GenomeLoc loc = refContext.getLocus(); @@ -811,9 +811,6 @@ public class UnifiedGenotyperEngine { case EXACT: afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter); break; - case GRID_SEARCH: - afcm = new GridSearchAFEstimation(UAC, N, logger, verboseWriter); - break; default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 7425258d4..dda4a7a09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -1059,15 +1059,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } GenotypeCollection genotypes = GenotypeCollection.create(); - for ( String sample : normalSamples ) { Map attrs = call.makeStatsAttributes(null); if ( call.isCall() ) // we made a call - put actual het genotype here: - genotypes.put(sample,new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); + genotypes.add(new Genotype(sample,alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) - genotypes.put(sample,new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); + genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrs,false)); } Set filters = null; @@ -1151,11 +1150,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker { GenotypeCollection genotypes = GenotypeCollection.create(); for ( String sample : normalSamples ) { - genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false)); + genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsNormal,false)); } for ( String sample : tumorSamples ) { - genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) ); + genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_NEG_LOG_10PERROR,null,attrsTumor,false) ); } Set filters = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index 53cfaa3a9..5ae034b0a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -122,7 +122,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { if (useSingleSample != null) { // only want to output context for one sample Genotype sampGt = vc.getGenotype(useSingleSample); if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP! - vc = vc.subContextFromGenotypes(sampGt); + vc = vc.subContextFromSample(sampGt.getSampleName()); else // asked for a sample that this vc does not contain, so ignore this vc: return; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index b35c54f94..2a3e353ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -293,7 +293,7 @@ public class PhaseByTransmission extends RodWalker { if (tracker != null) { VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); - GenotypeCollection genotypeCollection = vc.getGenotypes(); + GenotypeCollection genotypeCollection = GenotypeCollection.create(vc.getGenotypes().size()); for (Trio trio : trios) { Genotype mother = vc.getGenotype(trio.getMother()); @@ -306,9 +306,7 @@ public class PhaseByTransmission extends RodWalker { Genotype phasedFather = trioGenotypes.get(1); Genotype phasedChild = trioGenotypes.get(2); - genotypeCollection.put(phasedMother.getSampleName(), phasedMother); - genotypeCollection.put(phasedFather.getSampleName(), phasedFather); - genotypeCollection.put(phasedChild.getSampleName(), phasedChild); + genotypeCollection.add(phasedMother, phasedFather, phasedChild); } VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeCollection); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 132ed1582..82adfe96c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -122,7 +122,8 @@ public class ReadBackedPhasingWalker extends RodWalker samplesToPhase = null; + protected Set + samplesToPhase = null; private GenomeLoc mostDownstreamLocusReached = null; @@ -272,10 +273,10 @@ public class ReadBackedPhasingWalker extends RodWalker KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet(Arrays.asList(PQ_KEY)); - private VariantContext reduceVCToSamples(VariantContext vc, List samplesToPhase) { + private VariantContext reduceVCToSamples(VariantContext vc, Set samplesToPhase) { // for ( String sample : samplesToPhase ) // logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() )); - VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values()); + VariantContext subvc = vc.subContextFromSamples(samplesToPhase); // logger.debug("original VC = " + vc); // logger.debug("sub VC = " + subvc); return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF); @@ -354,9 +355,8 @@ public class ReadBackedPhasingWalker extends RodWalker samplePhaseStats = new TreeMap(); - for (Map.Entry sampGtEntry : sampGenotypes.entrySet()) { - String samp = sampGtEntry.getKey(); - Genotype gt = sampGtEntry.getValue(); + for (final Genotype gt : sampGenotypes) { + String samp = gt.getSampleName(); if (DEBUG) logger.debug("sample = " + samp); if (isUnfilteredCalledDiploidGenotype(gt)) { @@ -1134,7 +1134,7 @@ public class ReadBackedPhasingWalker extends RodWalker(vc.getAttributes()); @@ -1153,7 +1153,7 @@ public class ReadBackedPhasingWalker extends RodWalker 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; + for (final Genotype g : vc1.getGenotypes()) { + final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; switch (g.getType()) { case NO_CALL: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java index 8cc321ef5..417e340b8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java @@ -118,7 +118,7 @@ public class G1KPhaseITable extends VariantEvaluator { } // count variants per sample - for (final Genotype g : eval.getGenotypes().values()) { + for (final Genotype g : eval.getGenotypes()) { if ( ! g.isNoCall() && ! g.isHomRef() ) { int count = countsPerSample.get(g.getSampleName()).get(eval.getType()); countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index bbd3f5f54..70b37f500 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -209,7 +209,7 @@ public class GenotypeConcordance extends VariantEvaluator { //public GenotypeConcordance(VariantEvalWalker parent) { // super(parent); - // discordantInteresting = parent.DISCORDANT_INTERESTING; + // discordantInteresting = parent.DISCORDANT_INTERESTING; //} public String getName() { @@ -277,8 +277,9 @@ public class GenotypeConcordance extends VariantEvaluator { // determine concordance for eval data if (eval != null) { - for (final String sample : eval.getGenotypes().keySet()) { - final Genotype.Type called = eval.getGenotype(sample).getType(); + for (final Genotype g : eval.getGenotypes() ) { + final String sample = g.getSampleName(); + final Genotype.Type called = g.getType(); final Genotype.Type truth; if (!validationIsValidVC || !validation.hasGenotype(sample)) { @@ -299,9 +300,9 @@ public class GenotypeConcordance extends VariantEvaluator { else { final Genotype.Type called = Genotype.Type.NO_CALL; - for (final String sample : validation.getGenotypes().keySet()) { - final Genotype.Type truth = validation.getGenotype(sample).getType(); - detailedStats.incrValue(sample, truth, called); + for (final Genotype g : validation.getGenotypes()) { + final Genotype.Type truth = g.getType(); + detailedStats.incrValue(g.getSampleName(), truth, called); // print out interesting sites /* @@ -410,8 +411,8 @@ class SampleStats implements TableType { public SampleStats(VariantContext vc, int nGenotypeTypes) { this.nGenotypeTypes = nGenotypeTypes; - for (String sample : vc.getGenotypes().keySet()) - concordanceStats.put(sample, new long[nGenotypeTypes][nGenotypeTypes]); + for (final Genotype g : vc.getGenotypes()) + concordanceStats.put(g.getSampleName(), new long[nGenotypeTypes][nGenotypeTypes]); } public SampleStats(int genotypeTypes) { @@ -511,8 +512,8 @@ class SampleSummaryStats implements TableType { public SampleSummaryStats(final VariantContext vc) { concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]); - for( final String sample : vc.getGenotypes().keySet() ) { - concordanceSummary.put(sample, new double[COLUMN_KEYS.length]); + for( final Genotype g : vc.getGenotypes() ) { + concordanceSummary.put(g.getSampleName(), new double[COLUMN_KEYS.length]); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index e51623c3c..e1069d2d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -48,7 +48,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { float numGenosHere = 0; int numIndsHere = 0; - for (Genotype genotype : vc.getGenotypes().values()) { + for (final Genotype genotype : vc.getGenotypes()) { numIndsHere++; if (!genotype.isNoCall()) { //increment stats for heterozygosity diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index e700a733c..24caed549 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -266,7 +266,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested sample */ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - return getSubsetOfVariantContext(vc, Arrays.asList(sampleName)); + return getSubsetOfVariantContext(vc, new HashSet(Arrays.asList(sampleName))); } /** @@ -276,7 +276,7 @@ public class VariantEvalUtils { * @param sampleNames the samples to pull out of the VariantContext * @return a new VariantContext with just the requested samples */ - public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { + public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) { VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles()); HashMap newAts = new HashMap(vcsub.getAttributes()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index f87ec5d3b..0148a71c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -212,15 +212,15 @@ public class LeftAlignVariants extends RodWalker { // create new Genotype objects GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); - for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + for ( final Genotype genotype : vc.getGenotypes() ) { List newAlleles = new ArrayList(); - for ( Allele allele : genotype.getValue().getAlleles() ) { + for ( Allele allele : genotype.getAlleles() ) { Allele newA = alleleMap.get(allele); if ( newA == null ) newA = Allele.NO_CALL; newAlleles.add(newA); } - newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); + newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 3764a9998..3c92bf00f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -557,7 +557,7 @@ public class SelectVariants extends RodWalker { // check if we find it in the variant rod GenotypeCollection genotypes = vc.getGenotypes(samples); - for (Genotype g : genotypes.values()) { + for (final Genotype g : genotypes) { if (sampleHasVariant(g)) { // There is a variant called (or filtered with not exclude filtered option set) that is not HomRef for at least one of the samples. if (compVCs == null) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index ff7fb3434..27eaa7b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -130,8 +130,7 @@ public class VariantsToVCF extends RodWalker { // set the appropriate sample name if necessary if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); - GenotypeCollection genotypes = GenotypeCollection.create(1); - genotypes.put(sampleName, g); + GenotypeCollection genotypes = GenotypeCollection.create(g); vc = VariantContext.modifyGenotypes(vc, genotypes); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 0da7a100f..d13002642 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -451,7 +451,7 @@ public class StandardVCFWriter extends IndexingVCFWriter { boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; - for ( Genotype g : vc.getGenotypes().values() ) { + for ( final Genotype g : vc.getGenotypes() ) { keys.addAll(g.getAttributes().keySet()); if ( g.isAvailable() ) sawGoodGT = true; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index 302b93da7..4d6f26e87 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -180,7 +180,7 @@ public class VCF3Codec extends AbstractVCFCodec { // add it to the list try { - genotypes.put(sampleName, new Genotype(sampleName, + genotypes.add(new Genotype(sampleName, parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap), GTQual, genotypeFilters, diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 8256c9cac..696b35050 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -209,13 +209,7 @@ public class VCFCodec extends AbstractVCFCodec { // add it to the list try { - genotypes.put(sampleName, - new Genotype(sampleName, - GTalleles, - GTQual, - genotypeFilters, - gtAttributes, - phased)); + genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased)); } catch (TribbleException e) { throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); } diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java index 9a900d734..a06fe906f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java +++ b/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java @@ -155,12 +155,12 @@ public class GCF { if ( genotypes.isEmpty() ) return VariantContext.NO_GENOTYPES; else { - GenotypeCollection map = GenotypeCollection.create(); + GenotypeCollection map = GenotypeCollection.create(genotypes.size()); for ( int i = 0; i < genotypes.size(); i++ ) { final String sampleName = header.getSample(i); final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap); - map.put(sampleName, g); + map.add(g); } return map; @@ -173,7 +173,7 @@ public class GCF { List genotypes = new ArrayList(nGenotypes); for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null); - for ( Genotype g : vc.getGenotypes().values() ) { + for ( Genotype g : vc.getGenotypes() ) { int i = GCFHeaderBuilder.encodeSample(g.getSampleName()); genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java index a83356647..f12a8e531 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java @@ -67,20 +67,28 @@ public class GenotypeCollection implements List { } public static final GenotypeCollection create(final int nGenotypes) { - return new GenotypeCollection(nGenotypes, true); + return new GenotypeCollection(nGenotypes, false); } // todo -- differentiate between empty constructor and copy constructor // todo -- create constructor (Genotype ... genotypes) public static final GenotypeCollection create(final ArrayList genotypes) { - return new GenotypeCollection(genotypes, true); + return new GenotypeCollection(genotypes, false); + } + + public static final GenotypeCollection create(final Genotype... genotypes) { + return new GenotypeCollection(new ArrayList(Arrays.asList(genotypes)), false); } public static final GenotypeCollection copy(final GenotypeCollection toCopy) { return create(toCopy.genotypes); } + public static final GenotypeCollection copy(final Collection toCopy) { + return create(new ArrayList(toCopy)); + } + // public static final GenotypeMap create(final Collection genotypes) { // if ( genotypes == null ) // return null; // todo -- really should return an empty map @@ -173,6 +181,12 @@ public class GenotypeCollection implements List { return genotypes.add(genotype); } + public boolean add(final Genotype ... genotype) { + checkImmutability(); + invalidateCaches(); + return genotypes.addAll(Arrays.asList(genotype)); + } + @Override public void add(final int i, final Genotype genotype) { throw new UnsupportedOperationException(); @@ -291,7 +305,7 @@ public class GenotypeCollection implements List { return genotypes.toArray(ts); } - public Iterable iterateInOrder(final Iterable sampleNamesInOrder) { + public Iterable iterateInSampleNameOrder(final Iterable sampleNamesInOrder) { return new Iterable() { @Override public Iterator iterator() { @@ -300,6 +314,10 @@ public class GenotypeCollection implements List { }; } + public Iterable iterateInSampleNameOrder() { + return iterateInSampleNameOrder(getSampleNamesOrderedByName()); + } + private final class InOrderIterator implements Iterator { final Iterator sampleNamesInOrder; @@ -322,4 +340,41 @@ public class GenotypeCollection implements List { throw new UnsupportedOperationException(); } } + + public Set getSampleNames() { + buildCache(); + return sampleNameToOffset.keySet(); + } + + public Set getSampleNamesOrderedByName() { + return new TreeSet(getSampleNames()); + } + + public boolean containsSample(final String sample) { + buildCache(); + return sampleNameToOffset.containsKey(sample); + } + + public boolean containsSamples(final Collection samples) { + buildCache(); + return getSampleNames().containsAll(samples); + } + + public GenotypeCollection subsetToSamples( final Collection samples ) { + return subsetToSamples(new HashSet(samples)); + } + + public GenotypeCollection subsetToSamples( final Set samples ) { + if ( samples.size() == genotypes.size() ) + return this; + else if ( samples.isEmpty() ) + return NO_GENOTYPES; + else { + GenotypeCollection subset = create(samples.size()); + for ( final Genotype g : genotypes ) + if ( samples.contains(g.getSampleName()) ) + subset.add(g); + return subset; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 5bd29a0ce..b394517bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.lang.reflect.Array; import java.util.*; /** @@ -279,7 +280,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { this(source, contig, start, stop, alleles, - GenotypeCollection.create(genotypes), + GenotypeCollection.copy(genotypes), negLog10PError, filters, attributes, null, false); } @@ -423,58 +424,73 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- - /** - * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype - * genotype and alleles in genotype. This is the right way to test if a single genotype is actually - * variant or not. - * - * @param genotype genotype - * @return vc subcontext - */ - public VariantContext subContextFromGenotypes(Genotype genotype) { - return subContextFromGenotypes(Arrays.asList(genotype)); - } +// /** +// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype +// * genotype and alleles in genotype. This is the right way to test if a single genotype is actually +// * variant or not. +// * +// * @param genotype genotype +// * @return vc subcontext +// * @deprecated replaced by {@link #subContextFromSample(String)} +// */ +// public VariantContext subContextFromGenotypes(Genotype genotype) { +// return subContextFromGenotypes(Arrays.asList(genotype)); +// } +// +// +// /** +// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes +// * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually +// * variant or not. +// * +// * @param genotypes genotypes +// * @return vc subcontext +// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection)} +// */ +// public VariantContext subContextFromGenotypes(Collection genotypes) { +// return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ; +// } +// +// /** +// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes +// * genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes +// * +// * @param genotypes genotypes +// * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more +// * @return vc subcontext +// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection, java.util.Collection)} +// */ +// @Deprecated +// public VariantContext subContextFromGenotypes(Collection genotypes, Collection alleles) { +// return new VariantContext(getSource(), contig, start, stop, alleles, +// GenotypeCollection.create(genotypes), +// getNegLog10PError(), +// filtersWereApplied() ? getFilters() : null, +// getAttributes(), +// getReferenceBaseForIndel()); +// } - - /** - * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes - * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually - * variant or not. - * - * @param genotypes genotypes - * @return vc subcontext - */ - public VariantContext subContextFromGenotypes(Collection genotypes) { - return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ; - } - - /** - * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes - * genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes - * - * @param genotypes genotypes - * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more - * @return vc subcontext - */ - public VariantContext subContextFromGenotypes(Collection genotypes, Collection alleles) { + public VariantContext subContextFromSamples(Set sampleNames, Collection alleles) { return new VariantContext(getSource(), contig, start, stop, alleles, - GenotypeCollection.create(genotypes), + genotypes.subsetToSamples(sampleNames), getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel()); } - public VariantContext subContextFromSamples(Collection sampleNames, Collection alleles) { - return subContextFromGenotypes(getGenotypes(sampleNames).values(), alleles); - } - - public VariantContext subContextFromSamples(Collection sampleNames) { - return subContextFromGenotypes(getGenotypes(sampleNames).values()); + public VariantContext subContextFromSamples(Set sampleNames) { + GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames); + return new VariantContext(getSource(), contig, start, stop, allelesOfGenotypes(newGenotypes), + newGenotypes, + getNegLog10PError(), + filtersWereApplied() ? getFilters() : null, + getAttributes(), + getReferenceBaseForIndel()); } public VariantContext subContextFromSample(String sampleName) { - return subContextFromGenotypes(getGenotype(sampleName)); + return subContextFromSamples(new HashSet(Arrays.asList(sampleName))); } /** @@ -875,16 +891,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati */ public boolean hasGenotypes() { loadGenotypes(); - return genotypes.size() > 0; + return ! genotypes.isEmpty(); } public boolean hasGenotypes(Collection sampleNames) { loadGenotypes(); - for ( String name : sampleNames ) { - if ( ! genotypes.containsKey(name) ) - return false; - } - return true; + return genotypes.containsSamples(sampleNames); } /** @@ -895,10 +907,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati return genotypes; } - public List getGenotypesSortedByName() { + public Iterable getGenotypesSortedByName() { loadGenotypes(); - Collection types = new TreeMap(genotypes).values(); - return new ArrayList(types); + return genotypes.iterateInSampleNameOrder(); } /** @@ -922,24 +933,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ public GenotypeCollection getGenotypes(Collection sampleNames) { - GenotypeCollection map = GenotypeCollection.create(sampleNames.size()); - - for ( String name : sampleNames ) { - if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames); - final Genotype g = getGenotype(name); - if ( g != null ) { - map.put(name, g); - } - } - - return map; + return getGenotypes().subsetToSamples(sampleNames); } + public GenotypeCollection getGenotypes(Set sampleNames) { + return getGenotypes().subsetToSamples(sampleNames); + } + + /** - * @return the set of all sample names in this context + * @return the set of all sample names in this context, not ordered */ public Set getSampleNames() { - return getGenotypes().keySet(); + return getGenotypes().getSampleNames(); + } + + public Set getSampleNamesOrderedByName() { + return getGenotypes().getSampleNamesOrderedByName(); } /** @@ -952,11 +962,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati } public boolean hasGenotype(String sample) { - return getGenotypes().containsKey(sample); + return getGenotypes().containsSample(sample); } public Genotype getGenotype(int ith) { - return getGenotypesSortedByName().get(ith); + return genotypes.get(ith); } @@ -968,7 +978,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati public int getChromosomeCount() { int n = 0; - for ( Genotype g : getGenotypes().values() ) { + for ( final Genotype g : getGenotypes() ) { n += g.isNoCall() ? 0 : g.getPloidy(); } @@ -984,7 +994,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati public int getChromosomeCount(Allele a) { int n = 0; - for ( Genotype g : getGenotypes().values() ) { + for ( final Genotype g : getGenotypes() ) { n += g.getAlleles(a).size(); } @@ -1015,7 +1025,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati if ( genotypeCounts == null ) { genotypeCounts = new int[Genotype.Type.values().length]; - for ( Genotype g : getGenotypes().values() ) { + for ( final Genotype g : getGenotypes() ) { if ( g.isNoCall() ) genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++; else if ( g.isHomRef() ) @@ -1136,7 +1146,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati List reportedAlleles = getAlleles(); Set observedAlleles = new HashSet(); observedAlleles.add(getReference()); - for ( Genotype g : getGenotypes().values() ) { + for ( final Genotype g : getGenotypes() ) { if ( g.isCalled() ) observedAlleles.addAll(g.getAlleles()); } @@ -1285,12 +1295,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati private void validateGenotypes() { if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null"); - for ( Map.Entry elt : this.genotypes.entrySet() ) { - String name = elt.getKey(); - Genotype g = elt.getValue(); - - if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName()); - + for ( final Genotype g : this.genotypes ) { if ( g.isAvailable() ) { for ( Allele gAllele : g.getAlleles() ) { if ( ! hasAllele(gAllele) && gAllele.isCalled() ) @@ -1465,8 +1470,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati Byte refByte = inputVC.getReferenceBaseForIndel(); List alleles = new ArrayList(); - GenotypeCollection genotypes = GenotypeCollection.create(); - GenotypeCollection inputGenotypes = inputVC.getGenotypes(); for (Allele a : inputVC.getAlleles()) { // get bases for current allele and create a new one with trimmed bases @@ -1483,11 +1486,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati } // now we can recreate new genotypes with trimmed alleles - for (String sample : inputVC.getSampleNames()) { - Genotype g = inputGenotypes.get(sample); - + GenotypeCollection genotypes = GenotypeCollection.create(inputVC.getNSamples()); + for (final Genotype g : inputVC.getGenotypes() ) { List inAlleles = g.getAlleles(); - List newGenotypeAlleles = new ArrayList(); + List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); for (Allele a : inAlleles) { if (a.isCalled()) { if (a.isSymbolic()) { @@ -1506,8 +1508,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati newGenotypeAlleles.add(Allele.NO_CALL); } } - genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(), - g.getFilters(),g.getAttributes(),g.isPhased())); + genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getNegLog10PError(), + g.getFilters(), g.getAttributes(), g.isPhased())); } @@ -1520,48 +1522,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati } - public ArrayList getTwoAllelesWithHighestAlleleCounts() { - // first idea: get two alleles with highest AC - int maxAC1 = 0, maxAC2=0,maxAC1ind =0, maxAC2ind = 0; - int i=0; - int[] alleleCounts = new int[this.getAlleles().size()]; - ArrayList alleleArray = new ArrayList(); - for (Allele a:this.getAlleles()) { - int ac = this.getChromosomeCount(a); - if (ac >=maxAC1) { - maxAC1 = ac; - maxAC1ind = i; - } - alleleArray.add(a); - alleleCounts[i++] = ac; - } - // now get second best allele - for (i=0; i < alleleCounts.length; i++) { - if (i == maxAC1ind) - continue; - if (alleleCounts[i] >= maxAC2) { - maxAC2 = alleleCounts[i]; - maxAC2ind = i; - } - } - - Allele alleleA, alleleB; - if (alleleArray.get(maxAC1ind).isReference()) { - alleleA = alleleArray.get(maxAC1ind); - alleleB = alleleArray.get(maxAC2ind); - } - else if (alleleArray.get(maxAC2ind).isReference()) { - alleleA = alleleArray.get(maxAC2ind); - alleleB = alleleArray.get(maxAC1ind); - } else { - alleleA = alleleArray.get(maxAC1ind); - alleleB = alleleArray.get(maxAC2ind); - } - ArrayList a = new ArrayList(); - a.add(alleleA); - a.add(alleleB); - return a; - } public Allele getAltAlleleWithHighestAlleleCount() { // first idea: get two alleles with highest AC Allele best = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 89db22bd6..afe48f4f5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -70,7 +70,7 @@ public class VariantContextUtils { * @return VariantContext object */ public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.create(genotypes), negLog10PError, filters, attributes); + return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, GenotypeCollection.copy(genotypes), negLog10PError, filters, attributes); } /** @@ -351,10 +351,9 @@ public class VariantContextUtils { // Genotypes final GenotypeCollection genotypes = GenotypeCollection.create(vc.getNSamples()); - for ( final Genotype g : vc.getGenotypes().values() ) { + for ( final Genotype g : vc.getGenotypes() ) { Map genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve); - genotypes.put(g.getSampleName(), - new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(), + genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.getFilters(), genotypeAttributes, g.isPhased())); } @@ -682,9 +681,9 @@ public class VariantContextUtils { if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles - for ( Map.Entry sample : inputVC.getGenotypes().entrySet() ) { + for ( final Genotype genotype : inputVC.getGenotypes() ) { - List originalAlleles = sample.getValue().getAlleles(); + List originalAlleles = genotype.getAlleles(); List trimmedAlleles = new ArrayList(); for ( Allele a : originalAlleles ) { if ( a.isCalled() ) @@ -692,7 +691,7 @@ public class VariantContextUtils { else trimmedAlleles.add(Allele.NO_CALL); } - genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles)); + genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0])); @@ -705,8 +704,8 @@ public class VariantContextUtils { public static GenotypeCollection stripPLs(GenotypeCollection genotypes) { GenotypeCollection newGs = GenotypeCollection.create(genotypes.size()); - for ( Map.Entry g : genotypes.entrySet() ) { - newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue()); + for ( final Genotype g : genotypes ) { + newGs.add(g.hasLikelihoods() ? removePLs(g) : g); } return newGs; @@ -884,9 +883,9 @@ public class VariantContextUtils { } private static void mergeGenotypes(GenotypeCollection mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { - for ( Genotype g : oneVC.getGenotypes().values() ) { + for ( Genotype g : oneVC.getGenotypes() ) { String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); - if ( ! mergedGenotypes.containsKey(name) ) { + if ( ! mergedGenotypes.containsSample(name) ) { // only add if the name is new Genotype newG = g; @@ -895,7 +894,7 @@ public class VariantContextUtils { newG = new Genotype(name, alleles, g.getNegLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); } - mergedGenotypes.put(name, newG); + mergedGenotypes.add(newG); } } } @@ -924,15 +923,15 @@ public class VariantContextUtils { // create new Genotype objects GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); - for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + for ( final Genotype genotype : vc.getGenotypes() ) { List newAlleles = new ArrayList(); - for ( Allele allele : genotype.getValue().getAlleles() ) { + for ( Allele allele : genotype.getAlleles() ) { Allele newAllele = alleleMap.get(allele); if ( newAllele == null ) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } - newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); + newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); @@ -944,13 +943,13 @@ public class VariantContextUtils { return vc; GenotypeCollection newGenotypes = GenotypeCollection.create(vc.getNSamples()); - for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + for ( final Genotype genotype : vc.getGenotypes() ) { Map attrs = new HashMap(); - for ( Map.Entry attr : genotype.getValue().getAttributes().entrySet() ) { + for ( Map.Entry attr : genotype.getAttributes().entrySet() ) { if ( allowedAttributes.contains(attr.getKey()) ) attrs.put(attr.getKey(), attr.getValue()); } - newGenotypes.put(genotype.getKey(), Genotype.modifyAttributes(genotype.getValue(), attrs)); + newGenotypes.add(Genotype.modifyAttributes(genotype, attrs)); } return VariantContext.modifyGenotypes(vc, newGenotypes); @@ -1023,10 +1022,8 @@ public class VariantContextUtils { MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypeCollection mergedGenotypes = GenotypeCollection.create(); - for (Map.Entry gt1Entry : vc1.getGenotypes().entrySet()) { - String sample = gt1Entry.getKey(); - Genotype gt1 = gt1Entry.getValue(); - Genotype gt2 = vc2.getGenotype(sample); + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List site1Alleles = gt1.getAlleles(); List site2Alleles = gt2.getAlleles(); @@ -1052,8 +1049,8 @@ public class VariantContextUtils { if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); - Genotype mergedGt = new Genotype(sample, mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); - mergedGenotypes.put(sample, mergedGt); + Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); + mergedGenotypes.add(mergedGt); } String mergedName = VariantContextUtils.mergeVariantContextNames(vc1.getSource(), vc2.getSource()); @@ -1197,8 +1194,7 @@ public class VariantContextUtils { } private static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) { - for (Map.Entry gtEntry : vc.getGenotypes().entrySet()) { - Genotype gt = gtEntry.getValue(); + for (final Genotype gt : vc.getGenotypes()) { if (gt.isNoCall() || gt.isFiltered()) return false; } @@ -1210,10 +1206,8 @@ public class VariantContextUtils { private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: - for (Map.Entry gt1Entry : vc1.getGenotypes().entrySet()) { - String sample = gt1Entry.getKey(); - Genotype gt1 = gt1Entry.getValue(); - Genotype gt2 = vc2.getGenotype(sample); + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom return false; @@ -1275,10 +1269,8 @@ public class VariantContextUtils { */ public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { - for (Map.Entry gt1Entry : vc1.getGenotypes().entrySet()) { - String sample = gt1Entry.getKey(); - Genotype gt1 = gt1Entry.getValue(); - Genotype gt2 = vc2.getGenotype(sample); + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List site1Alleles = gt1.getAlleles(); List site2Alleles = gt2.getAlleles(); @@ -1309,10 +1301,8 @@ public class VariantContextUtils { allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); // Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples). - for (Map.Entry gt1Entry : vc1.getGenotypes().entrySet()) { - String sample = gt1Entry.getKey(); - Genotype gt1 = gt1Entry.getValue(); - Genotype gt2 = vc2.getGenotype(sample); + for (final Genotype gt1 : vc1.getGenotypes()) { + Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List site1Alleles = gt1.getAlleles(); List site2Alleles = gt2.getAlleles(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java index 7607049db..181516f33 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java @@ -37,13 +37,14 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.List; +import java.util.*; /** * Test routine for new VariantContext object @@ -93,7 +94,7 @@ public class TestVariantContextWalker extends RodWalker { if ( writer != null && n == 0 ) { if ( ! wroteHeader ) { - writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); + writer.writeHeader(createVCFHeader(vc)); wroteHeader = true; } @@ -115,6 +116,10 @@ public class TestVariantContextWalker extends RodWalker { } } + private static VCFHeader createVCFHeader(VariantContext vc) { + return new VCFHeader(new HashSet(), vc.getGenotypes().getSampleNamesSorted()); + } + public Integer reduceInit() { return 0; } diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index d698d12a3..f60418155 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -122,7 +122,7 @@ public class VCFWriterUnitTest extends BaseTest { List alleles = new ArrayList(); Set filters = null; Map attributes = new HashMap(); - GenotypeCollection genotypes = GenotypeCollection.create(); + GenotypeCollection genotypes = GenotypeCollection.create(header.getGenotypeSamples().size()); alleles.add(Allele.create("-",true)); alleles.add(Allele.create("CC",false)); @@ -133,7 +133,7 @@ public class VCFWriterUnitTest extends BaseTest { gtattributes.put("BB","1"); Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true); - genotypes.put(name,gt); + genotypes.add(gt); } return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A'); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index f5e485587..b21a32174 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, - GenotypeCollection.create(genotypes), 1.0, filters, null, Cref.getBases()[0]); + GenotypeCollection.copy(genotypes), 1.0, filters, null, Cref.getBases()[0]); } // -------------------------------------------------------------------------------- @@ -509,7 +509,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { } // necessary to not overload equals for genotypes - private void assertGenotypesAreMostlyEqual(Map actual, Map expected) { + private void assertGenotypesAreMostlyEqual(GenotypeCollection actual, GenotypeCollection expected) { if (actual == expected) { return; } @@ -522,10 +522,8 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); } - for (Map.Entry entry : actual.entrySet()) { - String key = entry.getKey(); - Genotype value = entry.getValue(); - Genotype expectedValue = expected.get(key); + for (Genotype value : actual) { + Genotype expectedValue = expected.get(value.getSampleName()); Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal"); Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal"); @@ -545,7 +543,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); // test genotypes - Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet(Arrays.asList("s1.1", "s1.2"))); + Assert.assertEquals(merged.getSampleNames(), new HashSet(Arrays.asList("s1.1", "s1.2"))); } @Test(expectedExceptions = UserException.class)