diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index ee08cfa3b..84a4a3120 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -51,7 +51,7 @@ public class SampleList extends InfoFieldAnnotation { return null; StringBuffer samples = new StringBuffer(); - for ( Genotype genotype : vc.getGenotypesSortedByName() ) { + for ( Genotype genotype : vc.getGenotypesOrderedByName() ) { if ( genotype.isCalled() && !genotype.isHomRef() ){ if ( samples.length() > 0 ) samples.append(","); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 3b2de4769..77a02874d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -31,9 +31,9 @@ import java.util.*; */ public class GenotypesContext implements List { public final static GenotypesContext NO_GENOTYPES = - new GenotypesContext(new ArrayList(0), new HashMap(0), new HashSet(0), true); + new GenotypesContext(new ArrayList(0), new HashMap(0), Collections.emptyList(), true); - Set sampleNamesInOrder = null; + List sampleNamesInOrder = null; Map sampleNameToOffset = null; boolean cacheIsInvalid = true; List genotypes; @@ -62,7 +62,7 @@ public class GenotypesContext implements List { private GenotypesContext(final ArrayList genotypes, final Map sampleNameToOffset, - final Set sampleNamesInOrder, + final List sampleNamesInOrder, final boolean immutable) { this.genotypes = genotypes; this.immutable = immutable; @@ -152,7 +152,7 @@ public class GenotypesContext implements List { private void buildCache() { cacheIsInvalid = false; - sampleNamesInOrder = new TreeSet(); + sampleNamesInOrder = new ArrayList(genotypes.size()); sampleNameToOffset = new HashMap(genotypes.size()); for ( int i = 0; i < genotypes.size(); i++ ) { @@ -160,6 +160,7 @@ public class GenotypesContext implements List { sampleNamesInOrder.add(g.getSampleName()); sampleNameToOffset.put(g.getSampleName(), i); } + Collections.sort(sampleNamesInOrder); } @@ -354,7 +355,7 @@ public class GenotypesContext implements List { return sampleNameToOffset.keySet(); } - public Set getSampleNamesOrderedByName() { + public List getSampleNamesOrderedByName() { buildCache(); return sampleNamesInOrder; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 75e3aac58..7798e259c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -985,11 +985,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati return genotypes; } - public Iterable getGenotypesSortedByName() { + public Iterable getGenotypesOrderedByName() { loadGenotypes(); return genotypes.iterateInSampleNameOrder(); } + public Iterable getGenotypesOrderedBy(Iterable sampleOrdering) { + loadGenotypes(); + return genotypes.iterateInSampleNameOrder(sampleOrdering); + } + /** * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map * for consistency with the multi-get function. @@ -1026,7 +1031,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati return getGenotypes().getSampleNames(); } - public Set getSampleNamesOrderedByName() { + public List getSampleNamesOrderedByName() { return getGenotypes().getSampleNamesOrderedByName(); } @@ -1049,7 +1054,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati /** - * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS + * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) * * @return chromosome count */ @@ -1057,7 +1062,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati int n = 0; for ( final Genotype g : getGenotypes() ) { - n += g.isNoCall() ? 0 : g.getPloidy(); + for ( final Allele a : g.getAlleles() ) + n += a.isNoCall() ? 0 : 1; } return n; @@ -1086,7 +1092,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return true if it's monomorphic */ public boolean isMonomorphic() { - return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples()); + return ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount()); } /** @@ -1104,16 +1110,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati genotypeCounts = new int[Genotype.Type.values().length]; for ( final Genotype g : getGenotypes() ) { - if ( g.isNoCall() ) - genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++; - else if ( g.isHomRef() ) - genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++; - else if ( g.isHet() ) - genotypeCounts[Genotype.Type.HET.ordinal()]++; - else if ( g.isHomVar() ) - genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++; - else - genotypeCounts[Genotype.Type.MIXED.ordinal()]++; + genotypeCounts[g.getType().ordinal()]++; } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index 273b8fdf7..fae7cb05a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -209,7 +209,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { public void run(final VariantContext vc) { ; // TODO - TEST IS BROKEN // int n = 0; -// for ( final Genotype g: vc.getGenotypesSortedByName() ) n++; +// for ( final Genotype g: vc.getGenotypesOrderedByName() ) n++; } }; @@ -335,7 +335,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { // ; // TODO - TEST IS BROKEN -// //vc.getGenotypesSortedByName(); +// //vc.getGenotypesOrderedByName(); // } // }; // diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index f2eb2dd57..5bc72e132 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -263,7 +263,7 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertFalse(vc.isMonomorphic()); Assert.assertTrue(vc.isPolymorphic()); Assert.assertEquals(vc.getGenotype("foo"), g); - Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called + Assert.assertEquals(vc.getChromosomeCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called Assert.assertEquals(vc.getChromosomeCount(Aref), 0); Assert.assertEquals(vc.getChromosomeCount(C), 1); Assert.assertFalse(vc.getGenotype("foo").isHet()); @@ -690,9 +690,6 @@ public class VariantContextUnitTest extends BaseTest { return SubContextTest.getTests(SubContextTest.class); } - private final static void SubContextTest() { - } - @Test(dataProvider = "SubContextTest") public void runSubContextTest(SubContextTest cfg) { Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10); @@ -734,4 +731,117 @@ public class VariantContextUnitTest extends BaseTest { // same sample names => success Assert.assertEquals(sub.getGenotypes().getSampleNames(), expectedGC.getSampleNames()); } + + // -------------------------------------------------------------------------------- + // + // Test sample name functions + // + // -------------------------------------------------------------------------------- + private class SampleNamesTest extends TestDataProvider { + List sampleNames; + List sampleNamesInOrder; + + private SampleNamesTest(List sampleNames, List sampleNamesInOrder) { + super(SampleNamesTest.class); + this.sampleNamesInOrder = sampleNamesInOrder; + this.sampleNames = sampleNames; + } + + public String toString() { + return String.format("%s samples=%s order=%s", super.toString(), sampleNames, sampleNamesInOrder); + } + } + + @DataProvider(name = "SampleNamesTest") + public Object[][] MakeSampleNamesTest() { + new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1")); + new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2")); + new SampleNamesTest(Arrays.asList("1", "2"), Arrays.asList("1", "2")); + new SampleNamesTest(Arrays.asList("1", "2", "3"), Arrays.asList("1", "2", "3")); + new SampleNamesTest(Arrays.asList("2", "1", "3"), Arrays.asList("1", "2", "3")); + new SampleNamesTest(Arrays.asList("2", "3", "1"), Arrays.asList("1", "2", "3")); + new SampleNamesTest(Arrays.asList("3", "1", "2"), Arrays.asList("1", "2", "3")); + new SampleNamesTest(Arrays.asList("3", "2", "1"), Arrays.asList("1", "2", "3")); + new SampleNamesTest(Arrays.asList("NA2", "NA1"), Arrays.asList("NA1", "NA2")); + return SampleNamesTest.getTests(SampleNamesTest.class); + } + + private final static void assertGenotypesAreInOrder(Iterable gIt, List names) { + int i = 0; + for ( final Genotype g : gIt ) { + Assert.assertEquals(g.getSampleName(), names.get(i), "Unexpected genotype ordering"); + i++; + } + } + + + @Test(dataProvider = "SampleNamesTest") + public void runSampleNamesTest(SampleNamesTest cfg) { + GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size()); + for ( final String name : cfg.sampleNames ) { + gc.add(new Genotype(name, Arrays.asList(Aref, T))); + } + + VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, + snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); + + // same sample names => success + Assert.assertEquals(vc.getSampleNames(), new HashSet(cfg.sampleNames), "vc.getSampleNames() = " + vc.getSampleNames()); + Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName()); + + assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder); + assertGenotypesAreInOrder(vc.getGenotypesOrderedBy(cfg.sampleNames), cfg.sampleNames); + } + + @Test + public void testGenotypeCounting() { + Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL)); + Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL)); + Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref)); + Genotype het = new Genotype("het", Arrays.asList(Aref, T)); + Genotype homVar = new Genotype("homVar", Arrays.asList(T, T)); + + List allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar); + final int nCycles = allGenotypes.size() * 10; + + for ( int i = 0; i < nCycles; i++ ) { + int nNoCall = 0, nNoCallAlleles = 0, nA = 0, nT = 0, nMixed = 0, nHomRef = 0, nHet = 0, nHomVar = 0; + int nSamples = 0; + GenotypesContext gc = GenotypesContext.create(); + for ( int j = 0; j < i; j++ ) { + nSamples++; + Genotype g = allGenotypes.get(j % allGenotypes.size()); + gc.add(g); + switch ( g.getType() ) { + case NO_CALL: nNoCall++; nNoCallAlleles++; break; + case HOM_REF: nA += 2; nHomRef++; break; + case HET: nA++; nT++; nHet++; break; + case HOM_VAR: nT += 2; nHomVar++; break; + case MIXED: nA++; nNoCallAlleles++; nMixed++; break; + default: throw new RuntimeException("Unexpected genotype type " + g.getType()); + } + + } + + VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, + snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); + + Assert.assertEquals(vc.getNSamples(), nSamples); + if ( nSamples > 0 ) { + Assert.assertEquals(vc.isPolymorphic(), nT > 0); + Assert.assertEquals(vc.isMonomorphic(), nT == 0); + } + Assert.assertEquals(vc.getChromosomeCount(), nA + nT); + + Assert.assertEquals(vc.getChromosomeCount(Allele.NO_CALL), nNoCallAlleles); + Assert.assertEquals(vc.getChromosomeCount(Aref), nA); + Assert.assertEquals(vc.getChromosomeCount(T), nT); + + Assert.assertEquals(vc.getNoCallCount(), nNoCall); + Assert.assertEquals(vc.getHomRefCount(), nHomRef); + Assert.assertEquals(vc.getHetCount(), nHet); + Assert.assertEquals(vc.getHomVarCount(), nHomVar); + Assert.assertEquals(vc.getMixedCount(), nMixed); + } + } } \ No newline at end of file