changes to the variation class, updates to SSG, updated tests based on changes to the SSGenotypeCall, and added the ability to run a single integration test from using the build script.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1577 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-10 04:31:33 +00:00
parent c988205884
commit 5a64a80ab5
6 changed files with 87 additions and 55 deletions

View File

@ -3,6 +3,7 @@
<property name="source.dir" value="java/src" />
<property name="single" value="*Test" />
<property name="singleintegration" value="*IntegrationTest" />
<property name="dist" value="dist" />
<!-- should our junit test output go to a file or the screen?
@ -271,7 +272,7 @@
<batchtest fork="yes" todir="${report}">
<fileset dir="${test.classes}">
<include name="**/*IntegrationTest.class"/>
<include name="**/${singleintegration}.class"/>
</fileset>
</batchtest>
</junit>

View File

@ -29,23 +29,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
// if this is null, we were constructed with the intention that we'd represent the best genotype
private DiploidGenotype mGenotype = null;
// which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best
private DiploidGenotype mCompareTo = null;
// the reference genotype and the next best genotype, lazy loaded
private DiploidGenotype mRefGenotype = null;
private DiploidGenotype mNextGenotype = null;
// are we best vrs ref or best vrs next - for internal consumption only
private final boolean mBestVrsRef;
//private final boolean mBestVrsRef;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discovery are we representing the best vrs next or best vrs ref
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mBestVrsRef = discovery;
public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@ -55,14 +54,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discovery are we representing the best vrs next or best vrs ref
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mBestVrsRef = discovery;
SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@ -93,7 +90,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public String toString() {
lazyEval();
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
getLocation(), mGenotype, mCompareTo, mRefBase, mPileup.getReads().size(),
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
}
@ -105,13 +102,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
}
// our comparison
if (mCompareTo == null) {
if (this.mBestVrsRef) {
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
} else {
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
if (mRefGenotype == null) {
mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2));
}
if (mNextGenotype == null) {
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
}
@ -123,15 +119,13 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
*/
@Override
public double getNegLog10PError() {
getBestGenotype();
getAltGenotype();
return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo));
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
}
/**
* get the best genotype
*/
public DiploidGenotype getBestGenotype() {
private DiploidGenotype getBestGenotype() {
lazyEval();
return mGenotype;
}
@ -139,9 +133,17 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/**
* get the alternate genotype
*/
public DiploidGenotype getAltGenotype() {
private DiploidGenotype getNextBest() {
lazyEval();
return mCompareTo;
return mNextGenotype;
}
/**
* get the alternate genotype
*/
private DiploidGenotype getRefGenotype() {
lazyEval();
return mRefGenotype;
}
/**
@ -209,12 +211,11 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString());
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
}
/**
@ -222,8 +223,9 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
*
* @return
*/
public Variant toVariation() {
return null; // the next step is to implement the variant system
public Variation toVariation() {
double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
return new BasicVariation(this.getBases(), this.getReference(), 0, this.mLocation, bestRef);
}
/**
@ -268,7 +270,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x) {
return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x);
return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
}
/**

View File

@ -104,7 +104,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
gl.add(pileup, true);
gl.validate();
return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup);
return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
} else {
return null;
}
@ -145,7 +145,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
}
/**
* If we've found a LOD >= 5 variant, output it to disk.
* If we've found a LOD variant or callable genotype, output it to disk.
*
* @param call an GenotypeCall object for the variant.
* @param sum accumulator for the reduce.
@ -155,15 +155,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
sum.nCalledBases++;
// todo -- aaron, fixme -- this should be using variation() in discovery mode and genotype if not
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
if (call.getNegLog10PError() >= LOD_THRESHOLD) {
double confidence = (GENOTYPE) ? call.getNegLog10PError() : call.toVariation().getNegLog10PError();
if (confidence >= LOD_THRESHOLD) {
sum.nConfidentCalls++;
//System.out.printf("Call %s%n", call);
sum.writer.addGenotypeCall(call);
} else {
} else
sum.nNonConfidentCalls++;
}
}
return sum;
}

View File

@ -79,6 +79,6 @@ public interface Genotype {
*
* @return the variant
*/
public Variant toVariation();
public Variation toVariation();
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* @author aaron
* <p/>
@ -8,7 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
* <p/>
* This class represents a variant
*/
public interface Variant {
public interface Variation {
// the types of variants we currently allow
public enum VARIANT_TYPE {
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
@ -47,19 +48,48 @@ public interface Variant {
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
public String toBases();
public String getBases();
/**
* get the location that this Variant represents
*
* @return a GenomeLoc
*/
public GenomeLoc getLocation();
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference();
/** is our base representation heterozygous */
public boolean isHet();
/** is our base representation homozygous */
public boolean isHom();
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError();
/**
* are we truely a variant, given a reference
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
*/
public boolean isReference();
/**
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
* @return
*/
public char getAlternateBase();
}

View File

@ -79,43 +79,43 @@ public class SSGenotypeCallTest extends BaseTest {
@Test
public void testBestVrsRefSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(true, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(0, call.getNegLog10PError(), 0.0000001);
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(true, myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(9, call2.getNegLog10PError(), 0.0000001);
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(true, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(4, call3.getNegLog10PError(), 0.0000001);
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNextSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(false, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
}
}