changes to the variation class, updates to SSG, updated tests based on changes to the SSGenotypeCall, and added the ability to run a single integration test from using the build script.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1577 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-10 04:31:33 +00:00
parent c988205884
commit 5a64a80ab5
6 changed files with 87 additions and 55 deletions

View File

@ -3,6 +3,7 @@
<property name="source.dir" value="java/src" /> <property name="source.dir" value="java/src" />
<property name="single" value="*Test" /> <property name="single" value="*Test" />
<property name="singleintegration" value="*IntegrationTest" />
<property name="dist" value="dist" /> <property name="dist" value="dist" />
<!-- should our junit test output go to a file or the screen? <!-- should our junit test output go to a file or the screen?
@ -271,7 +272,7 @@
<batchtest fork="yes" todir="${report}"> <batchtest fork="yes" todir="${report}">
<fileset dir="${test.classes}"> <fileset dir="${test.classes}">
<include name="**/*IntegrationTest.class"/> <include name="**/${singleintegration}.class"/>
</fileset> </fileset>
</batchtest> </batchtest>
</junit> </junit>

View File

@ -29,23 +29,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
// if this is null, we were constructed with the intention that we'd represent the best genotype // if this is null, we were constructed with the intention that we'd represent the best genotype
private DiploidGenotype mGenotype = null; private DiploidGenotype mGenotype = null;
// which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best // the reference genotype and the next best genotype, lazy loaded
private DiploidGenotype mCompareTo = null; private DiploidGenotype mRefGenotype = null;
private DiploidGenotype mNextGenotype = null;
// are we best vrs ref or best vrs next - for internal consumption only // are we best vrs ref or best vrs next - for internal consumption only
private final boolean mBestVrsRef; //private final boolean mBestVrsRef;
/** /**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
* *
* @param discovery are we representing the best vrs next or best vrs ref * @param location the location we're working with
* @param location the location we're working with * @param refBase the ref base
* @param refBase the ref base * @param gtlh the genotype likelihoods object
* @param gtlh the genotype likelihoods object * @param pileup the pile-up of reads at the specified locus
* @param pileup the pile-up of reads at the specified locus
*/ */
public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mBestVrsRef = discovery;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh; mGenotypeLikelihoods = gtlh;
mLocation = location; mLocation = location;
@ -55,14 +54,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/** /**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
* *
* @param discovery are we representing the best vrs next or best vrs ref * @param location the location we're working with
* @param location the location we're working with * @param refBase the ref base
* @param refBase the ref base * @param gtlh the genotype likelihoods object
* @param gtlh the genotype likelihoods object * @param pileup the pile-up of reads at the specified locus
* @param pileup the pile-up of reads at the specified locus
*/ */
SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mBestVrsRef = discovery;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh; mGenotypeLikelihoods = gtlh;
mLocation = location; mLocation = location;
@ -93,7 +90,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public String toString() { public String toString() {
lazyEval(); lazyEval();
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s", return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
getLocation(), mGenotype, mCompareTo, mRefBase, mPileup.getReads().size(), getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
} }
@ -105,13 +102,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
} }
// our comparison // our comparison
if (mCompareTo == null) { if (mRefGenotype == null) {
if (this.mBestVrsRef) { mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2));
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2)); }
} else { if (mNextGenotype == null) {
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
} }
} }
@ -123,15 +119,13 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
*/ */
@Override @Override
public double getNegLog10PError() { public double getNegLog10PError() {
getBestGenotype(); return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
getAltGenotype();
return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo));
} }
/** /**
* get the best genotype * get the best genotype
*/ */
public DiploidGenotype getBestGenotype() { private DiploidGenotype getBestGenotype() {
lazyEval(); lazyEval();
return mGenotype; return mGenotype;
} }
@ -139,9 +133,17 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/** /**
* get the alternate genotype * get the alternate genotype
*/ */
public DiploidGenotype getAltGenotype() { private DiploidGenotype getNextBest() {
lazyEval(); lazyEval();
return mCompareTo; return mNextGenotype;
}
/**
* get the alternate genotype
*/
private DiploidGenotype getRefGenotype() {
lazyEval();
return mRefGenotype;
} }
/** /**
@ -209,12 +211,11 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* given the reference, are we a variant? (non-ref) * given the reference, are we a variant? (non-ref)
* *
* @param ref the reference base or bases * @param ref the reference base or bases
*
* @return true if we're a variant * @return true if we're a variant
*/ */
@Override @Override
public boolean isVariant(char ref) { public boolean isVariant(char ref) {
return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString()); return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
} }
/** /**
@ -222,8 +223,9 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* *
* @return * @return
*/ */
public Variant toVariation() { public Variation toVariation() {
return null; // the next step is to implement the variant system double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
return new BasicVariation(this.getBases(), this.getReference(), 0, this.mLocation, bestRef);
} }
/** /**
@ -268,7 +270,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* @return an array in lexigraphical order of the likelihoods * @return an array in lexigraphical order of the likelihoods
*/ */
public Genotype getGenotype(DiploidGenotype x) { public Genotype getGenotype(DiploidGenotype x) {
return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x); return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
} }
/** /**

View File

@ -104,7 +104,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
gl.add(pileup, true); gl.add(pileup, true);
gl.validate(); gl.validate();
return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup); return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
} else { } else {
return null; return null;
} }
@ -145,7 +145,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
} }
/** /**
* If we've found a LOD >= 5 variant, output it to disk. * If we've found a LOD variant or callable genotype, output it to disk.
* *
* @param call an GenotypeCall object for the variant. * @param call an GenotypeCall object for the variant.
* @param sum accumulator for the reduce. * @param sum accumulator for the reduce.
@ -155,15 +155,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
public CallResult reduce(SSGenotypeCall call, CallResult sum) { public CallResult reduce(SSGenotypeCall call, CallResult sum) {
sum.nCalledBases++; sum.nCalledBases++;
// todo -- aaron, fixme -- this should be using variation() in discovery mode and genotype if not
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) { if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
if (call.getNegLog10PError() >= LOD_THRESHOLD) { double confidence = (GENOTYPE) ? call.getNegLog10PError() : call.toVariation().getNegLog10PError();
if (confidence >= LOD_THRESHOLD) {
sum.nConfidentCalls++; sum.nConfidentCalls++;
//System.out.printf("Call %s%n", call); //System.out.printf("Call %s%n", call);
sum.writer.addGenotypeCall(call); sum.writer.addGenotypeCall(call);
} else { } else
sum.nNonConfidentCalls++; sum.nNonConfidentCalls++;
}
} }
return sum; return sum;
} }

View File

@ -79,6 +79,6 @@ public interface Genotype {
* *
* @return the variant * @return the variant
*/ */
public Variant toVariation(); public Variation toVariation();
} }

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
@ -8,7 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
* <p/> * <p/>
* This class represents a variant * This class represents a variant
*/ */
public interface Variant { public interface Variation {
// the types of variants we currently allow // the types of variants we currently allow
public enum VARIANT_TYPE { public enum VARIANT_TYPE {
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
@ -47,19 +48,48 @@ public interface Variant {
/** /**
* get the base representation of this Variant * get the base representation of this Variant
*
* @return a string, of ploidy * @return a string, of ploidy
*/ */
public String toBases(); public String getBases();
/** /**
* get the location that this Variant represents * get the location that this Variant represents
*
* @return a GenomeLoc * @return a GenomeLoc
*/ */
public GenomeLoc getLocation(); public GenomeLoc getLocation();
/** /**
* get the reference base(s) at this position * get the reference base(s) at this position
*
* @return the reference base or bases, as a string * @return the reference base or bases, as a string
*/ */
public String getReference(); public String getReference();
/** is our base representation heterozygous */
public boolean isHet();
/** is our base representation homozygous */
public boolean isHom();
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError();
/**
* are we truely a variant, given a reference
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
*/
public boolean isReference();
/**
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
* @return
*/
public char getAlternateBase();
} }

View File

@ -79,43 +79,43 @@ public class SSGenotypeCallTest extends BaseTest {
@Test @Test
public void testBestVrsRefSame() { public void testBestVrsRefSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(true, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(0, call.getNegLog10PError(), 0.0000001); Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
} }
@Test @Test
public void testBestVrsRef2() { public void testBestVrsRef2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(true, myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(9, call2.getNegLog10PError(), 0.0000001); Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
} }
@Test @Test
public void testBestVrsRef3() { public void testBestVrsRef3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(true, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(4, call3.getNegLog10PError(), 0.0000001); Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
} }
@Test @Test
public void testBestVrsNextSame() { public void testBestVrsNextSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001); Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
} }
@Test @Test
public void testBestVrsNext2() { public void testBestVrsNext2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001); Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
} }
@Test @Test
public void testBestVrsNext3() { public void testBestVrsNext3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup(); Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(false, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001); Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
} }
} }