UnitTests for VariantContext for chrCount, getSampleNames, Order function
-- Major change to how chromosomeCounts is computed. Now NO_CALL alleles are always excluded. So ChromosomeCounts(A/.) is 1, the previous result would have been 2. -- Naming changes for getSamplesNameInOrder()
This commit is contained in:
parent
02f22cc9f8
commit
fa454c88bb
|
|
@ -51,7 +51,7 @@ public class SampleList extends InfoFieldAnnotation {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
StringBuffer samples = new StringBuffer();
|
StringBuffer samples = new StringBuffer();
|
||||||
for ( Genotype genotype : vc.getGenotypesSortedByName() ) {
|
for ( Genotype genotype : vc.getGenotypesOrderedByName() ) {
|
||||||
if ( genotype.isCalled() && !genotype.isHomRef() ){
|
if ( genotype.isCalled() && !genotype.isHomRef() ){
|
||||||
if ( samples.length() > 0 )
|
if ( samples.length() > 0 )
|
||||||
samples.append(",");
|
samples.append(",");
|
||||||
|
|
|
||||||
|
|
@ -31,9 +31,9 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class GenotypesContext implements List<Genotype> {
|
public class GenotypesContext implements List<Genotype> {
|
||||||
public final static GenotypesContext NO_GENOTYPES =
|
public final static GenotypesContext NO_GENOTYPES =
|
||||||
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), new HashSet<String>(0), true);
|
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), Collections.<String>emptyList(), true);
|
||||||
|
|
||||||
Set<String> sampleNamesInOrder = null;
|
List<String> sampleNamesInOrder = null;
|
||||||
Map<String, Integer> sampleNameToOffset = null;
|
Map<String, Integer> sampleNameToOffset = null;
|
||||||
boolean cacheIsInvalid = true;
|
boolean cacheIsInvalid = true;
|
||||||
List<Genotype> genotypes;
|
List<Genotype> genotypes;
|
||||||
|
|
@ -62,7 +62,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
|
|
||||||
private GenotypesContext(final ArrayList<Genotype> genotypes,
|
private GenotypesContext(final ArrayList<Genotype> genotypes,
|
||||||
final Map<String, Integer> sampleNameToOffset,
|
final Map<String, Integer> sampleNameToOffset,
|
||||||
final Set<String> sampleNamesInOrder,
|
final List<String> sampleNamesInOrder,
|
||||||
final boolean immutable) {
|
final boolean immutable) {
|
||||||
this.genotypes = genotypes;
|
this.genotypes = genotypes;
|
||||||
this.immutable = immutable;
|
this.immutable = immutable;
|
||||||
|
|
@ -152,7 +152,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
|
|
||||||
private void buildCache() {
|
private void buildCache() {
|
||||||
cacheIsInvalid = false;
|
cacheIsInvalid = false;
|
||||||
sampleNamesInOrder = new TreeSet<String>();
|
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
|
||||||
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
||||||
|
|
||||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||||
|
|
@ -160,6 +160,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
sampleNamesInOrder.add(g.getSampleName());
|
sampleNamesInOrder.add(g.getSampleName());
|
||||||
sampleNameToOffset.put(g.getSampleName(), i);
|
sampleNameToOffset.put(g.getSampleName(), i);
|
||||||
}
|
}
|
||||||
|
Collections.sort(sampleNamesInOrder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -354,7 +355,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
return sampleNameToOffset.keySet();
|
return sampleNameToOffset.keySet();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<String> getSampleNamesOrderedByName() {
|
public List<String> getSampleNamesOrderedByName() {
|
||||||
buildCache();
|
buildCache();
|
||||||
return sampleNamesInOrder;
|
return sampleNamesInOrder;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -985,11 +985,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
return genotypes;
|
return genotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterable<Genotype> getGenotypesSortedByName() {
|
public Iterable<Genotype> getGenotypesOrderedByName() {
|
||||||
loadGenotypes();
|
loadGenotypes();
|
||||||
return genotypes.iterateInSampleNameOrder();
|
return genotypes.iterateInSampleNameOrder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering) {
|
||||||
|
loadGenotypes();
|
||||||
|
return genotypes.iterateInSampleNameOrder(sampleOrdering);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
||||||
* for consistency with the multi-get function.
|
* for consistency with the multi-get function.
|
||||||
|
|
@ -1026,7 +1031,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
return getGenotypes().getSampleNames();
|
return getGenotypes().getSampleNames();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<String> getSampleNamesOrderedByName() {
|
public List<String> getSampleNamesOrderedByName() {
|
||||||
return getGenotypes().getSampleNamesOrderedByName();
|
return getGenotypes().getSampleNamesOrderedByName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1049,7 +1054,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
|
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
|
||||||
*
|
*
|
||||||
* @return chromosome count
|
* @return chromosome count
|
||||||
*/
|
*/
|
||||||
|
|
@ -1057,7 +1062,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
for ( final Genotype g : getGenotypes() ) {
|
||||||
n += g.isNoCall() ? 0 : g.getPloidy();
|
for ( final Allele a : g.getAlleles() )
|
||||||
|
n += a.isNoCall() ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
|
|
@ -1086,7 +1092,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @return true if it's monomorphic
|
* @return true if it's monomorphic
|
||||||
*/
|
*/
|
||||||
public boolean isMonomorphic() {
|
public boolean isMonomorphic() {
|
||||||
return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
|
return ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1104,16 +1110,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
genotypeCounts = new int[Genotype.Type.values().length];
|
genotypeCounts = new int[Genotype.Type.values().length];
|
||||||
|
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
for ( final Genotype g : getGenotypes() ) {
|
||||||
if ( g.isNoCall() )
|
genotypeCounts[g.getType().ordinal()]++;
|
||||||
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
|
|
||||||
else if ( g.isHomRef() )
|
|
||||||
genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++;
|
|
||||||
else if ( g.isHet() )
|
|
||||||
genotypeCounts[Genotype.Type.HET.ordinal()]++;
|
|
||||||
else if ( g.isHomVar() )
|
|
||||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
|
||||||
else
|
|
||||||
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
||||||
public void run(final VariantContext vc) {
|
public void run(final VariantContext vc) {
|
||||||
; // TODO - TEST IS BROKEN
|
; // TODO - TEST IS BROKEN
|
||||||
// int n = 0;
|
// int n = 0;
|
||||||
// for ( final Genotype g: vc.getGenotypesSortedByName() ) n++;
|
// for ( final Genotype g: vc.getGenotypesOrderedByName() ) n++;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -335,7 +335,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
||||||
// return new FunctionToBenchmark<org.broadinstitute.sting.utils.variantcontext.v13.VariantContext>() {
|
// return new FunctionToBenchmark<org.broadinstitute.sting.utils.variantcontext.v13.VariantContext>() {
|
||||||
// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) {
|
// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) {
|
||||||
// ; // TODO - TEST IS BROKEN
|
// ; // TODO - TEST IS BROKEN
|
||||||
// //vc.getGenotypesSortedByName();
|
// //vc.getGenotypesOrderedByName();
|
||||||
// }
|
// }
|
||||||
// };
|
// };
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -263,7 +263,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertFalse(vc.isMonomorphic());
|
Assert.assertFalse(vc.isMonomorphic());
|
||||||
Assert.assertTrue(vc.isPolymorphic());
|
Assert.assertTrue(vc.isPolymorphic());
|
||||||
Assert.assertEquals(vc.getGenotype("foo"), g);
|
Assert.assertEquals(vc.getGenotype("foo"), g);
|
||||||
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
|
Assert.assertEquals(vc.getChromosomeCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
|
||||||
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
|
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
|
||||||
Assert.assertEquals(vc.getChromosomeCount(C), 1);
|
Assert.assertEquals(vc.getChromosomeCount(C), 1);
|
||||||
Assert.assertFalse(vc.getGenotype("foo").isHet());
|
Assert.assertFalse(vc.getGenotype("foo").isHet());
|
||||||
|
|
@ -690,9 +690,6 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
return SubContextTest.getTests(SubContextTest.class);
|
return SubContextTest.getTests(SubContextTest.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static void SubContextTest() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(dataProvider = "SubContextTest")
|
@Test(dataProvider = "SubContextTest")
|
||||||
public void runSubContextTest(SubContextTest cfg) {
|
public void runSubContextTest(SubContextTest cfg) {
|
||||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10);
|
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10);
|
||||||
|
|
@ -734,4 +731,117 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
// same sample names => success
|
// same sample names => success
|
||||||
Assert.assertEquals(sub.getGenotypes().getSampleNames(), expectedGC.getSampleNames());
|
Assert.assertEquals(sub.getGenotypes().getSampleNames(), expectedGC.getSampleNames());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test sample name functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
private class SampleNamesTest extends TestDataProvider {
|
||||||
|
List<String> sampleNames;
|
||||||
|
List<String> sampleNamesInOrder;
|
||||||
|
|
||||||
|
private SampleNamesTest(List<String> sampleNames, List<String> sampleNamesInOrder) {
|
||||||
|
super(SampleNamesTest.class);
|
||||||
|
this.sampleNamesInOrder = sampleNamesInOrder;
|
||||||
|
this.sampleNames = sampleNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s samples=%s order=%s", super.toString(), sampleNames, sampleNamesInOrder);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "SampleNamesTest")
|
||||||
|
public Object[][] MakeSampleNamesTest() {
|
||||||
|
new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1"));
|
||||||
|
new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2"));
|
||||||
|
new SampleNamesTest(Arrays.asList("1", "2"), Arrays.asList("1", "2"));
|
||||||
|
new SampleNamesTest(Arrays.asList("1", "2", "3"), Arrays.asList("1", "2", "3"));
|
||||||
|
new SampleNamesTest(Arrays.asList("2", "1", "3"), Arrays.asList("1", "2", "3"));
|
||||||
|
new SampleNamesTest(Arrays.asList("2", "3", "1"), Arrays.asList("1", "2", "3"));
|
||||||
|
new SampleNamesTest(Arrays.asList("3", "1", "2"), Arrays.asList("1", "2", "3"));
|
||||||
|
new SampleNamesTest(Arrays.asList("3", "2", "1"), Arrays.asList("1", "2", "3"));
|
||||||
|
new SampleNamesTest(Arrays.asList("NA2", "NA1"), Arrays.asList("NA1", "NA2"));
|
||||||
|
return SampleNamesTest.getTests(SampleNamesTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static void assertGenotypesAreInOrder(Iterable<Genotype> gIt, List<String> names) {
|
||||||
|
int i = 0;
|
||||||
|
for ( final Genotype g : gIt ) {
|
||||||
|
Assert.assertEquals(g.getSampleName(), names.get(i), "Unexpected genotype ordering");
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "SampleNamesTest")
|
||||||
|
public void runSampleNamesTest(SampleNamesTest cfg) {
|
||||||
|
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
|
||||||
|
for ( final String name : cfg.sampleNames ) {
|
||||||
|
gc.add(new Genotype(name, Arrays.asList(Aref, T)));
|
||||||
|
}
|
||||||
|
|
||||||
|
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
|
||||||
|
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
|
||||||
|
|
||||||
|
// same sample names => success
|
||||||
|
Assert.assertEquals(vc.getSampleNames(), new HashSet<String>(cfg.sampleNames), "vc.getSampleNames() = " + vc.getSampleNames());
|
||||||
|
Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName());
|
||||||
|
|
||||||
|
assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder);
|
||||||
|
assertGenotypesAreInOrder(vc.getGenotypesOrderedBy(cfg.sampleNames), cfg.sampleNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGenotypeCounting() {
|
||||||
|
Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL));
|
||||||
|
Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL));
|
||||||
|
Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref));
|
||||||
|
Genotype het = new Genotype("het", Arrays.asList(Aref, T));
|
||||||
|
Genotype homVar = new Genotype("homVar", Arrays.asList(T, T));
|
||||||
|
|
||||||
|
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
|
||||||
|
final int nCycles = allGenotypes.size() * 10;
|
||||||
|
|
||||||
|
for ( int i = 0; i < nCycles; i++ ) {
|
||||||
|
int nNoCall = 0, nNoCallAlleles = 0, nA = 0, nT = 0, nMixed = 0, nHomRef = 0, nHet = 0, nHomVar = 0;
|
||||||
|
int nSamples = 0;
|
||||||
|
GenotypesContext gc = GenotypesContext.create();
|
||||||
|
for ( int j = 0; j < i; j++ ) {
|
||||||
|
nSamples++;
|
||||||
|
Genotype g = allGenotypes.get(j % allGenotypes.size());
|
||||||
|
gc.add(g);
|
||||||
|
switch ( g.getType() ) {
|
||||||
|
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
|
||||||
|
case HOM_REF: nA += 2; nHomRef++; break;
|
||||||
|
case HET: nA++; nT++; nHet++; break;
|
||||||
|
case HOM_VAR: nT += 2; nHomVar++; break;
|
||||||
|
case MIXED: nA++; nNoCallAlleles++; nMixed++; break;
|
||||||
|
default: throw new RuntimeException("Unexpected genotype type " + g.getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
|
||||||
|
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
|
||||||
|
|
||||||
|
Assert.assertEquals(vc.getNSamples(), nSamples);
|
||||||
|
if ( nSamples > 0 ) {
|
||||||
|
Assert.assertEquals(vc.isPolymorphic(), nT > 0);
|
||||||
|
Assert.assertEquals(vc.isMonomorphic(), nT == 0);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(), nA + nT);
|
||||||
|
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(Allele.NO_CALL), nNoCallAlleles);
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(Aref), nA);
|
||||||
|
Assert.assertEquals(vc.getChromosomeCount(T), nT);
|
||||||
|
|
||||||
|
Assert.assertEquals(vc.getNoCallCount(), nNoCall);
|
||||||
|
Assert.assertEquals(vc.getHomRefCount(), nHomRef);
|
||||||
|
Assert.assertEquals(vc.getHetCount(), nHet);
|
||||||
|
Assert.assertEquals(vc.getHomVarCount(), nHomVar);
|
||||||
|
Assert.assertEquals(vc.getMixedCount(), nMixed);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue