changes to the variation class, updates to SSG, updated tests based on changes to the SSGenotypeCall, and added the ability to run a single integration test from using the build script.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1577 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c988205884
commit
5a64a80ab5
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
<property name="source.dir" value="java/src" />
|
<property name="source.dir" value="java/src" />
|
||||||
<property name="single" value="*Test" />
|
<property name="single" value="*Test" />
|
||||||
|
<property name="singleintegration" value="*IntegrationTest" />
|
||||||
<property name="dist" value="dist" />
|
<property name="dist" value="dist" />
|
||||||
|
|
||||||
<!-- should our junit test output go to a file or the screen?
|
<!-- should our junit test output go to a file or the screen?
|
||||||
|
|
@ -271,7 +272,7 @@
|
||||||
|
|
||||||
<batchtest fork="yes" todir="${report}">
|
<batchtest fork="yes" todir="${report}">
|
||||||
<fileset dir="${test.classes}">
|
<fileset dir="${test.classes}">
|
||||||
<include name="**/*IntegrationTest.class"/>
|
<include name="**/${singleintegration}.class"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
</batchtest>
|
</batchtest>
|
||||||
</junit>
|
</junit>
|
||||||
|
|
|
||||||
|
|
@ -29,23 +29,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
// if this is null, we were constructed with the intention that we'd represent the best genotype
|
// if this is null, we were constructed with the intention that we'd represent the best genotype
|
||||||
private DiploidGenotype mGenotype = null;
|
private DiploidGenotype mGenotype = null;
|
||||||
|
|
||||||
// which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best
|
// the reference genotype and the next best genotype, lazy loaded
|
||||||
private DiploidGenotype mCompareTo = null;
|
private DiploidGenotype mRefGenotype = null;
|
||||||
|
private DiploidGenotype mNextGenotype = null;
|
||||||
|
|
||||||
// are we best vrs ref or best vrs next - for internal consumption only
|
// are we best vrs ref or best vrs next - for internal consumption only
|
||||||
private final boolean mBestVrsRef;
|
//private final boolean mBestVrsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||||
*
|
*
|
||||||
* @param discovery are we representing the best vrs next or best vrs ref
|
* @param location the location we're working with
|
||||||
* @param location the location we're working with
|
* @param refBase the ref base
|
||||||
* @param refBase the ref base
|
* @param gtlh the genotype likelihoods object
|
||||||
* @param gtlh the genotype likelihoods object
|
* @param pileup the pile-up of reads at the specified locus
|
||||||
* @param pileup the pile-up of reads at the specified locus
|
|
||||||
*/
|
*/
|
||||||
public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||||
mBestVrsRef = discovery;
|
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
|
|
@ -55,14 +54,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
/**
|
/**
|
||||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||||
*
|
*
|
||||||
* @param discovery are we representing the best vrs next or best vrs ref
|
* @param location the location we're working with
|
||||||
* @param location the location we're working with
|
* @param refBase the ref base
|
||||||
* @param refBase the ref base
|
* @param gtlh the genotype likelihoods object
|
||||||
* @param gtlh the genotype likelihoods object
|
* @param pileup the pile-up of reads at the specified locus
|
||||||
* @param pileup the pile-up of reads at the specified locus
|
|
||||||
*/
|
*/
|
||||||
SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||||
mBestVrsRef = discovery;
|
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
|
|
@ -93,7 +90,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
public String toString() {
|
public String toString() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
||||||
getLocation(), mGenotype, mCompareTo, mRefBase, mPileup.getReads().size(),
|
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
|
||||||
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -105,13 +102,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
}
|
}
|
||||||
|
|
||||||
// our comparison
|
// our comparison
|
||||||
if (mCompareTo == null) {
|
if (mRefGenotype == null) {
|
||||||
if (this.mBestVrsRef) {
|
mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2));
|
||||||
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
|
}
|
||||||
} else {
|
if (mNextGenotype == null) {
|
||||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||||
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,15 +119,13 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public double getNegLog10PError() {
|
public double getNegLog10PError() {
|
||||||
getBestGenotype();
|
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
|
||||||
getAltGenotype();
|
|
||||||
return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the best genotype
|
* get the best genotype
|
||||||
*/
|
*/
|
||||||
public DiploidGenotype getBestGenotype() {
|
private DiploidGenotype getBestGenotype() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return mGenotype;
|
return mGenotype;
|
||||||
}
|
}
|
||||||
|
|
@ -139,9 +133,17 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
/**
|
/**
|
||||||
* get the alternate genotype
|
* get the alternate genotype
|
||||||
*/
|
*/
|
||||||
public DiploidGenotype getAltGenotype() {
|
private DiploidGenotype getNextBest() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return mCompareTo;
|
return mNextGenotype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the alternate genotype
|
||||||
|
*/
|
||||||
|
private DiploidGenotype getRefGenotype() {
|
||||||
|
lazyEval();
|
||||||
|
return mRefGenotype;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -209,12 +211,11 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
* given the reference, are we a variant? (non-ref)
|
* given the reference, are we a variant? (non-ref)
|
||||||
*
|
*
|
||||||
* @param ref the reference base or bases
|
* @param ref the reference base or bases
|
||||||
*
|
|
||||||
* @return true if we're a variant
|
* @return true if we're a variant
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isVariant(char ref) {
|
public boolean isVariant(char ref) {
|
||||||
return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString());
|
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -222,8 +223,9 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public Variant toVariation() {
|
public Variation toVariation() {
|
||||||
return null; // the next step is to implement the variant system
|
double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
|
||||||
|
return new BasicVariation(this.getBases(), this.getReference(), 0, this.mLocation, bestRef);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -268,7 +270,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
* @return an array in lexigraphical order of the likelihoods
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
*/
|
*/
|
||||||
public Genotype getGenotype(DiploidGenotype x) {
|
public Genotype getGenotype(DiploidGenotype x) {
|
||||||
return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x);
|
return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
gl.add(pileup, true);
|
gl.add(pileup, true);
|
||||||
gl.validate();
|
gl.validate();
|
||||||
|
|
||||||
return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup);
|
return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -145,7 +145,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If we've found a LOD >= 5 variant, output it to disk.
|
* If we've found a LOD variant or callable genotype, output it to disk.
|
||||||
*
|
*
|
||||||
* @param call an GenotypeCall object for the variant.
|
* @param call an GenotypeCall object for the variant.
|
||||||
* @param sum accumulator for the reduce.
|
* @param sum accumulator for the reduce.
|
||||||
|
|
@ -155,15 +155,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
|
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
|
||||||
sum.nCalledBases++;
|
sum.nCalledBases++;
|
||||||
|
|
||||||
// todo -- aaron, fixme -- this should be using variation() in discovery mode and genotype if not
|
|
||||||
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
||||||
if (call.getNegLog10PError() >= LOD_THRESHOLD) {
|
double confidence = (GENOTYPE) ? call.getNegLog10PError() : call.toVariation().getNegLog10PError();
|
||||||
|
if (confidence >= LOD_THRESHOLD) {
|
||||||
sum.nConfidentCalls++;
|
sum.nConfidentCalls++;
|
||||||
//System.out.printf("Call %s%n", call);
|
//System.out.printf("Call %s%n", call);
|
||||||
sum.writer.addGenotypeCall(call);
|
sum.writer.addGenotypeCall(call);
|
||||||
} else {
|
} else
|
||||||
sum.nNonConfidentCalls++;
|
sum.nNonConfidentCalls++;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,6 @@ public interface Genotype {
|
||||||
*
|
*
|
||||||
* @return the variant
|
* @return the variant
|
||||||
*/
|
*/
|
||||||
public Variant toVariation();
|
public Variation toVariation();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
|
|
@ -8,7 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
* <p/>
|
* <p/>
|
||||||
* This class represents a variant
|
* This class represents a variant
|
||||||
*/
|
*/
|
||||||
public interface Variant {
|
public interface Variation {
|
||||||
// the types of variants we currently allow
|
// the types of variants we currently allow
|
||||||
public enum VARIANT_TYPE {
|
public enum VARIANT_TYPE {
|
||||||
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
|
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
|
||||||
|
|
@ -47,19 +48,48 @@ public interface Variant {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the base representation of this Variant
|
* get the base representation of this Variant
|
||||||
|
*
|
||||||
* @return a string, of ploidy
|
* @return a string, of ploidy
|
||||||
*/
|
*/
|
||||||
public String toBases();
|
public String getBases();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the location that this Variant represents
|
* get the location that this Variant represents
|
||||||
|
*
|
||||||
* @return a GenomeLoc
|
* @return a GenomeLoc
|
||||||
*/
|
*/
|
||||||
public GenomeLoc getLocation();
|
public GenomeLoc getLocation();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the reference base(s) at this position
|
* get the reference base(s) at this position
|
||||||
|
*
|
||||||
* @return the reference base or bases, as a string
|
* @return the reference base or bases, as a string
|
||||||
*/
|
*/
|
||||||
public String getReference();
|
public String getReference();
|
||||||
|
|
||||||
|
/** is our base representation heterozygous */
|
||||||
|
public boolean isHet();
|
||||||
|
|
||||||
|
/** is our base representation homozygous */
|
||||||
|
public boolean isHom();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the -1 * (log 10 of the error value)
|
||||||
|
*
|
||||||
|
* @return the log based error estimate
|
||||||
|
*/
|
||||||
|
public double getNegLog10PError();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* are we truely a variant, given a reference
|
||||||
|
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
||||||
|
*/
|
||||||
|
public boolean isReference();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public char getAlternateBase();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -79,43 +79,43 @@ public class SSGenotypeCallTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRefSame() {
|
public void testBestVrsRefSame() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call = new SSGenotypeCall(true, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(0, call.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRef2() {
|
public void testBestVrsRef2() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call2 = new SSGenotypeCall(true, myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(9, call2.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRef3() {
|
public void testBestVrsRef3() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call3 = new SSGenotypeCall(true, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(4, call3.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNextSame() {
|
public void testBestVrsNextSame() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNext2() {
|
public void testBestVrsNext2() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call2 = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNext3() {
|
public void testBestVrsNext3() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call3 = new SSGenotypeCall(false, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue