UnitTests for VariantContext for chrCount, getSampleNames, Order function

-- Major change to how chromosomeCounts is computed.  Now NO_CALL alleles are always excluded.  So ChromosomeCounts(A/.) is 1, the previous result would have been 2.
-- Naming changes for getSamplesNameInOrder()
This commit is contained in:
Mark DePristo 2011-11-17 20:37:22 -05:00
parent 02f22cc9f8
commit fa454c88bb
5 changed files with 135 additions and 27 deletions

View File

@ -51,7 +51,7 @@ public class SampleList extends InfoFieldAnnotation {
return null;
StringBuffer samples = new StringBuffer();
for ( Genotype genotype : vc.getGenotypesSortedByName() ) {
for ( Genotype genotype : vc.getGenotypesOrderedByName() ) {
if ( genotype.isCalled() && !genotype.isHomRef() ){
if ( samples.length() > 0 )
samples.append(",");

View File

@ -31,9 +31,9 @@ import java.util.*;
*/
public class GenotypesContext implements List<Genotype> {
public final static GenotypesContext NO_GENOTYPES =
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), new HashSet<String>(0), true);
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), Collections.<String>emptyList(), true);
Set<String> sampleNamesInOrder = null;
List<String> sampleNamesInOrder = null;
Map<String, Integer> sampleNameToOffset = null;
boolean cacheIsInvalid = true;
List<Genotype> genotypes;
@ -62,7 +62,7 @@ public class GenotypesContext implements List<Genotype> {
private GenotypesContext(final ArrayList<Genotype> genotypes,
final Map<String, Integer> sampleNameToOffset,
final Set<String> sampleNamesInOrder,
final List<String> sampleNamesInOrder,
final boolean immutable) {
this.genotypes = genotypes;
this.immutable = immutable;
@ -152,7 +152,7 @@ public class GenotypesContext implements List<Genotype> {
private void buildCache() {
cacheIsInvalid = false;
sampleNamesInOrder = new TreeSet<String>();
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
for ( int i = 0; i < genotypes.size(); i++ ) {
@ -160,6 +160,7 @@ public class GenotypesContext implements List<Genotype> {
sampleNamesInOrder.add(g.getSampleName());
sampleNameToOffset.put(g.getSampleName(), i);
}
Collections.sort(sampleNamesInOrder);
}
@ -354,7 +355,7 @@ public class GenotypesContext implements List<Genotype> {
return sampleNameToOffset.keySet();
}
public Set<String> getSampleNamesOrderedByName() {
public List<String> getSampleNamesOrderedByName() {
buildCache();
return sampleNamesInOrder;
}

View File

@ -985,11 +985,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return genotypes;
}
public Iterable<Genotype> getGenotypesSortedByName() {
public Iterable<Genotype> getGenotypesOrderedByName() {
loadGenotypes();
return genotypes.iterateInSampleNameOrder();
}
public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering) {
loadGenotypes();
return genotypes.iterateInSampleNameOrder(sampleOrdering);
}
/**
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
* for consistency with the multi-get function.
@ -1026,7 +1031,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return getGenotypes().getSampleNames();
}
public Set<String> getSampleNamesOrderedByName() {
public List<String> getSampleNamesOrderedByName() {
return getGenotypes().getSampleNamesOrderedByName();
}
@ -1049,7 +1054,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
*
* @return chromosome count
*/
@ -1057,7 +1062,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
int n = 0;
for ( final Genotype g : getGenotypes() ) {
n += g.isNoCall() ? 0 : g.getPloidy();
for ( final Allele a : g.getAlleles() )
n += a.isNoCall() ? 0 : 1;
}
return n;
@ -1086,7 +1092,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return true if it's monomorphic
*/
public boolean isMonomorphic() {
return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
return ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount());
}
/**
@ -1104,16 +1110,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
genotypeCounts = new int[Genotype.Type.values().length];
for ( final Genotype g : getGenotypes() ) {
if ( g.isNoCall() )
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
else if ( g.isHomRef() )
genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++;
else if ( g.isHet() )
genotypeCounts[Genotype.Type.HET.ordinal()]++;
else if ( g.isHomVar() )
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
else
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
genotypeCounts[g.getType().ordinal()]++;
}
}
}

View File

@ -209,7 +209,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
public void run(final VariantContext vc) {
; // TODO - TEST IS BROKEN
// int n = 0;
// for ( final Genotype g: vc.getGenotypesSortedByName() ) n++;
// for ( final Genotype g: vc.getGenotypesOrderedByName() ) n++;
}
};
@ -335,7 +335,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
// return new FunctionToBenchmark<org.broadinstitute.sting.utils.variantcontext.v13.VariantContext>() {
// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) {
// ; // TODO - TEST IS BROKEN
// //vc.getGenotypesSortedByName();
// //vc.getGenotypesOrderedByName();
// }
// };
//

View File

@ -263,7 +263,7 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotype("foo"), g);
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
Assert.assertEquals(vc.getChromosomeCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
Assert.assertEquals(vc.getChromosomeCount(C), 1);
Assert.assertFalse(vc.getGenotype("foo").isHet());
@ -690,9 +690,6 @@ public class VariantContextUnitTest extends BaseTest {
return SubContextTest.getTests(SubContextTest.class);
}
private final static void SubContextTest() {
}
@Test(dataProvider = "SubContextTest")
public void runSubContextTest(SubContextTest cfg) {
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10);
@ -734,4 +731,117 @@ public class VariantContextUnitTest extends BaseTest {
// same sample names => success
Assert.assertEquals(sub.getGenotypes().getSampleNames(), expectedGC.getSampleNames());
}
// --------------------------------------------------------------------------------
//
// Test sample name functions
//
// --------------------------------------------------------------------------------
private class SampleNamesTest extends TestDataProvider {
List<String> sampleNames;
List<String> sampleNamesInOrder;
private SampleNamesTest(List<String> sampleNames, List<String> sampleNamesInOrder) {
super(SampleNamesTest.class);
this.sampleNamesInOrder = sampleNamesInOrder;
this.sampleNames = sampleNames;
}
public String toString() {
return String.format("%s samples=%s order=%s", super.toString(), sampleNames, sampleNamesInOrder);
}
}
@DataProvider(name = "SampleNamesTest")
public Object[][] MakeSampleNamesTest() {
new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1"));
new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2"));
new SampleNamesTest(Arrays.asList("1", "2"), Arrays.asList("1", "2"));
new SampleNamesTest(Arrays.asList("1", "2", "3"), Arrays.asList("1", "2", "3"));
new SampleNamesTest(Arrays.asList("2", "1", "3"), Arrays.asList("1", "2", "3"));
new SampleNamesTest(Arrays.asList("2", "3", "1"), Arrays.asList("1", "2", "3"));
new SampleNamesTest(Arrays.asList("3", "1", "2"), Arrays.asList("1", "2", "3"));
new SampleNamesTest(Arrays.asList("3", "2", "1"), Arrays.asList("1", "2", "3"));
new SampleNamesTest(Arrays.asList("NA2", "NA1"), Arrays.asList("NA1", "NA2"));
return SampleNamesTest.getTests(SampleNamesTest.class);
}
private final static void assertGenotypesAreInOrder(Iterable<Genotype> gIt, List<String> names) {
int i = 0;
for ( final Genotype g : gIt ) {
Assert.assertEquals(g.getSampleName(), names.get(i), "Unexpected genotype ordering");
i++;
}
}
@Test(dataProvider = "SampleNamesTest")
public void runSampleNamesTest(SampleNamesTest cfg) {
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
for ( final String name : cfg.sampleNames ) {
gc.add(new Genotype(name, Arrays.asList(Aref, T)));
}
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
// same sample names => success
Assert.assertEquals(vc.getSampleNames(), new HashSet<String>(cfg.sampleNames), "vc.getSampleNames() = " + vc.getSampleNames());
Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName());
assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder);
assertGenotypesAreInOrder(vc.getGenotypesOrderedBy(cfg.sampleNames), cfg.sampleNames);
}
@Test
public void testGenotypeCounting() {
Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL));
Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL));
Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref));
Genotype het = new Genotype("het", Arrays.asList(Aref, T));
Genotype homVar = new Genotype("homVar", Arrays.asList(T, T));
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
final int nCycles = allGenotypes.size() * 10;
for ( int i = 0; i < nCycles; i++ ) {
int nNoCall = 0, nNoCallAlleles = 0, nA = 0, nT = 0, nMixed = 0, nHomRef = 0, nHet = 0, nHomVar = 0;
int nSamples = 0;
GenotypesContext gc = GenotypesContext.create();
for ( int j = 0; j < i; j++ ) {
nSamples++;
Genotype g = allGenotypes.get(j % allGenotypes.size());
gc.add(g);
switch ( g.getType() ) {
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
case HOM_REF: nA += 2; nHomRef++; break;
case HET: nA++; nT++; nHet++; break;
case HOM_VAR: nT += 2; nHomVar++; break;
case MIXED: nA++; nNoCallAlleles++; nMixed++; break;
default: throw new RuntimeException("Unexpected genotype type " + g.getType());
}
}
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
Assert.assertEquals(vc.getNSamples(), nSamples);
if ( nSamples > 0 ) {
Assert.assertEquals(vc.isPolymorphic(), nT > 0);
Assert.assertEquals(vc.isMonomorphic(), nT == 0);
}
Assert.assertEquals(vc.getChromosomeCount(), nA + nT);
Assert.assertEquals(vc.getChromosomeCount(Allele.NO_CALL), nNoCallAlleles);
Assert.assertEquals(vc.getChromosomeCount(Aref), nA);
Assert.assertEquals(vc.getChromosomeCount(T), nT);
Assert.assertEquals(vc.getNoCallCount(), nNoCall);
Assert.assertEquals(vc.getHomRefCount(), nHomRef);
Assert.assertEquals(vc.getHetCount(), nHet);
Assert.assertEquals(vc.getHomVarCount(), nHomVar);
Assert.assertEquals(vc.getMixedCount(), nMixed);
}
}
}