UnitTests for VariantContext for chrCount, getSampleNames, Order function
-- Major change to how chromosomeCounts is computed. Now NO_CALL alleles are always excluded. So ChromosomeCounts(A/.) is 1, the previous result would have been 2. -- Naming changes for getSamplesNameInOrder()
This commit is contained in:
parent
02f22cc9f8
commit
fa454c88bb
|
|
@ -51,7 +51,7 @@ public class SampleList extends InfoFieldAnnotation {
|
|||
return null;
|
||||
|
||||
StringBuffer samples = new StringBuffer();
|
||||
for ( Genotype genotype : vc.getGenotypesSortedByName() ) {
|
||||
for ( Genotype genotype : vc.getGenotypesOrderedByName() ) {
|
||||
if ( genotype.isCalled() && !genotype.isHomRef() ){
|
||||
if ( samples.length() > 0 )
|
||||
samples.append(",");
|
||||
|
|
|
|||
|
|
@ -31,9 +31,9 @@ import java.util.*;
|
|||
*/
|
||||
public class GenotypesContext implements List<Genotype> {
|
||||
public final static GenotypesContext NO_GENOTYPES =
|
||||
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), new HashSet<String>(0), true);
|
||||
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), Collections.<String>emptyList(), true);
|
||||
|
||||
Set<String> sampleNamesInOrder = null;
|
||||
List<String> sampleNamesInOrder = null;
|
||||
Map<String, Integer> sampleNameToOffset = null;
|
||||
boolean cacheIsInvalid = true;
|
||||
List<Genotype> genotypes;
|
||||
|
|
@ -62,7 +62,7 @@ public class GenotypesContext implements List<Genotype> {
|
|||
|
||||
private GenotypesContext(final ArrayList<Genotype> genotypes,
|
||||
final Map<String, Integer> sampleNameToOffset,
|
||||
final Set<String> sampleNamesInOrder,
|
||||
final List<String> sampleNamesInOrder,
|
||||
final boolean immutable) {
|
||||
this.genotypes = genotypes;
|
||||
this.immutable = immutable;
|
||||
|
|
@ -152,7 +152,7 @@ public class GenotypesContext implements List<Genotype> {
|
|||
|
||||
private void buildCache() {
|
||||
cacheIsInvalid = false;
|
||||
sampleNamesInOrder = new TreeSet<String>();
|
||||
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
|
||||
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
||||
|
||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||
|
|
@ -160,6 +160,7 @@ public class GenotypesContext implements List<Genotype> {
|
|||
sampleNamesInOrder.add(g.getSampleName());
|
||||
sampleNameToOffset.put(g.getSampleName(), i);
|
||||
}
|
||||
Collections.sort(sampleNamesInOrder);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -354,7 +355,7 @@ public class GenotypesContext implements List<Genotype> {
|
|||
return sampleNameToOffset.keySet();
|
||||
}
|
||||
|
||||
public Set<String> getSampleNamesOrderedByName() {
|
||||
public List<String> getSampleNamesOrderedByName() {
|
||||
buildCache();
|
||||
return sampleNamesInOrder;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -985,11 +985,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return genotypes;
|
||||
}
|
||||
|
||||
public Iterable<Genotype> getGenotypesSortedByName() {
|
||||
public Iterable<Genotype> getGenotypesOrderedByName() {
|
||||
loadGenotypes();
|
||||
return genotypes.iterateInSampleNameOrder();
|
||||
}
|
||||
|
||||
public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering) {
|
||||
loadGenotypes();
|
||||
return genotypes.iterateInSampleNameOrder(sampleOrdering);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
||||
* for consistency with the multi-get function.
|
||||
|
|
@ -1026,7 +1031,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return getGenotypes().getSampleNames();
|
||||
}
|
||||
|
||||
public Set<String> getSampleNamesOrderedByName() {
|
||||
public List<String> getSampleNamesOrderedByName() {
|
||||
return getGenotypes().getSampleNamesOrderedByName();
|
||||
}
|
||||
|
||||
|
|
@ -1049,7 +1054,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
|
||||
/**
|
||||
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
|
||||
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
|
||||
*
|
||||
* @return chromosome count
|
||||
*/
|
||||
|
|
@ -1057,7 +1062,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
int n = 0;
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
n += g.isNoCall() ? 0 : g.getPloidy();
|
||||
for ( final Allele a : g.getAlleles() )
|
||||
n += a.isNoCall() ? 0 : 1;
|
||||
}
|
||||
|
||||
return n;
|
||||
|
|
@ -1086,7 +1092,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return true if it's monomorphic
|
||||
*/
|
||||
public boolean isMonomorphic() {
|
||||
return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
|
||||
return ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1104,16 +1110,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
if ( g.isNoCall() )
|
||||
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
|
||||
else if ( g.isHomRef() )
|
||||
genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++;
|
||||
else if ( g.isHet() )
|
||||
genotypeCounts[Genotype.Type.HET.ordinal()]++;
|
||||
else if ( g.isHomVar() )
|
||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
||||
else
|
||||
genotypeCounts[Genotype.Type.MIXED.ordinal()]++;
|
||||
genotypeCounts[g.getType().ordinal()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
public void run(final VariantContext vc) {
|
||||
; // TODO - TEST IS BROKEN
|
||||
// int n = 0;
|
||||
// for ( final Genotype g: vc.getGenotypesSortedByName() ) n++;
|
||||
// for ( final Genotype g: vc.getGenotypesOrderedByName() ) n++;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -335,7 +335,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
// return new FunctionToBenchmark<org.broadinstitute.sting.utils.variantcontext.v13.VariantContext>() {
|
||||
// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) {
|
||||
// ; // TODO - TEST IS BROKEN
|
||||
// //vc.getGenotypesSortedByName();
|
||||
// //vc.getGenotypesOrderedByName();
|
||||
// }
|
||||
// };
|
||||
//
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertFalse(vc.isMonomorphic());
|
||||
Assert.assertTrue(vc.isPolymorphic());
|
||||
Assert.assertEquals(vc.getGenotype("foo"), g);
|
||||
Assert.assertEquals(vc.getChromosomeCount(), 2); // we know that there are 2 chromosomes, even though one isn't called
|
||||
Assert.assertEquals(vc.getChromosomeCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
|
||||
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
|
||||
Assert.assertEquals(vc.getChromosomeCount(C), 1);
|
||||
Assert.assertFalse(vc.getGenotype("foo").isHet());
|
||||
|
|
@ -690,9 +690,6 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
return SubContextTest.getTests(SubContextTest.class);
|
||||
}
|
||||
|
||||
private final static void SubContextTest() {
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SubContextTest")
|
||||
public void runSubContextTest(SubContextTest cfg) {
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10);
|
||||
|
|
@ -734,4 +731,117 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
// same sample names => success
|
||||
Assert.assertEquals(sub.getGenotypes().getSampleNames(), expectedGC.getSampleNames());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test sample name functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
private class SampleNamesTest extends TestDataProvider {
|
||||
List<String> sampleNames;
|
||||
List<String> sampleNamesInOrder;
|
||||
|
||||
private SampleNamesTest(List<String> sampleNames, List<String> sampleNamesInOrder) {
|
||||
super(SampleNamesTest.class);
|
||||
this.sampleNamesInOrder = sampleNamesInOrder;
|
||||
this.sampleNames = sampleNames;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s samples=%s order=%s", super.toString(), sampleNames, sampleNamesInOrder);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "SampleNamesTest")
|
||||
public Object[][] MakeSampleNamesTest() {
|
||||
new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1"));
|
||||
new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2"));
|
||||
new SampleNamesTest(Arrays.asList("1", "2"), Arrays.asList("1", "2"));
|
||||
new SampleNamesTest(Arrays.asList("1", "2", "3"), Arrays.asList("1", "2", "3"));
|
||||
new SampleNamesTest(Arrays.asList("2", "1", "3"), Arrays.asList("1", "2", "3"));
|
||||
new SampleNamesTest(Arrays.asList("2", "3", "1"), Arrays.asList("1", "2", "3"));
|
||||
new SampleNamesTest(Arrays.asList("3", "1", "2"), Arrays.asList("1", "2", "3"));
|
||||
new SampleNamesTest(Arrays.asList("3", "2", "1"), Arrays.asList("1", "2", "3"));
|
||||
new SampleNamesTest(Arrays.asList("NA2", "NA1"), Arrays.asList("NA1", "NA2"));
|
||||
return SampleNamesTest.getTests(SampleNamesTest.class);
|
||||
}
|
||||
|
||||
private final static void assertGenotypesAreInOrder(Iterable<Genotype> gIt, List<String> names) {
|
||||
int i = 0;
|
||||
for ( final Genotype g : gIt ) {
|
||||
Assert.assertEquals(g.getSampleName(), names.get(i), "Unexpected genotype ordering");
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "SampleNamesTest")
|
||||
public void runSampleNamesTest(SampleNamesTest cfg) {
|
||||
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
|
||||
for ( final String name : cfg.sampleNames ) {
|
||||
gc.add(new Genotype(name, Arrays.asList(Aref, T)));
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
|
||||
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
|
||||
|
||||
// same sample names => success
|
||||
Assert.assertEquals(vc.getSampleNames(), new HashSet<String>(cfg.sampleNames), "vc.getSampleNames() = " + vc.getSampleNames());
|
||||
Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName());
|
||||
|
||||
assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder);
|
||||
assertGenotypesAreInOrder(vc.getGenotypesOrderedBy(cfg.sampleNames), cfg.sampleNames);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeCounting() {
|
||||
Genotype noCall = new Genotype("nocall", Arrays.asList(Allele.NO_CALL));
|
||||
Genotype mixed = new Genotype("mixed", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype homRef = new Genotype("homRef", Arrays.asList(Aref, Aref));
|
||||
Genotype het = new Genotype("het", Arrays.asList(Aref, T));
|
||||
Genotype homVar = new Genotype("homVar", Arrays.asList(T, T));
|
||||
|
||||
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
|
||||
final int nCycles = allGenotypes.size() * 10;
|
||||
|
||||
for ( int i = 0; i < nCycles; i++ ) {
|
||||
int nNoCall = 0, nNoCallAlleles = 0, nA = 0, nT = 0, nMixed = 0, nHomRef = 0, nHet = 0, nHomVar = 0;
|
||||
int nSamples = 0;
|
||||
GenotypesContext gc = GenotypesContext.create();
|
||||
for ( int j = 0; j < i; j++ ) {
|
||||
nSamples++;
|
||||
Genotype g = allGenotypes.get(j % allGenotypes.size());
|
||||
gc.add(g);
|
||||
switch ( g.getType() ) {
|
||||
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
|
||||
case HOM_REF: nA += 2; nHomRef++; break;
|
||||
case HET: nA++; nT++; nHet++; break;
|
||||
case HOM_VAR: nT += 2; nHomVar++; break;
|
||||
case MIXED: nA++; nNoCallAlleles++; nMixed++; break;
|
||||
default: throw new RuntimeException("Unexpected genotype type " + g.getType());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc,
|
||||
snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc);
|
||||
|
||||
Assert.assertEquals(vc.getNSamples(), nSamples);
|
||||
if ( nSamples > 0 ) {
|
||||
Assert.assertEquals(vc.isPolymorphic(), nT > 0);
|
||||
Assert.assertEquals(vc.isMonomorphic(), nT == 0);
|
||||
}
|
||||
Assert.assertEquals(vc.getChromosomeCount(), nA + nT);
|
||||
|
||||
Assert.assertEquals(vc.getChromosomeCount(Allele.NO_CALL), nNoCallAlleles);
|
||||
Assert.assertEquals(vc.getChromosomeCount(Aref), nA);
|
||||
Assert.assertEquals(vc.getChromosomeCount(T), nT);
|
||||
|
||||
Assert.assertEquals(vc.getNoCallCount(), nNoCall);
|
||||
Assert.assertEquals(vc.getHomRefCount(), nHomRef);
|
||||
Assert.assertEquals(vc.getHetCount(), nHet);
|
||||
Assert.assertEquals(vc.getHomVarCount(), nHomVar);
|
||||
Assert.assertEquals(vc.getMixedCount(), nMixed);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue