changes to the variation class, updates to SSG, updated tests based on changes to the SSGenotypeCall, and added the ability to run a single integration test from using the build script.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1577 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c988205884
commit
5a64a80ab5
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
<property name="source.dir" value="java/src" />
|
||||
<property name="single" value="*Test" />
|
||||
<property name="singleintegration" value="*IntegrationTest" />
|
||||
<property name="dist" value="dist" />
|
||||
|
||||
<!-- should our junit test output go to a file or the screen?
|
||||
|
|
@ -271,7 +272,7 @@
|
|||
|
||||
<batchtest fork="yes" todir="${report}">
|
||||
<fileset dir="${test.classes}">
|
||||
<include name="**/*IntegrationTest.class"/>
|
||||
<include name="**/${singleintegration}.class"/>
|
||||
</fileset>
|
||||
</batchtest>
|
||||
</junit>
|
||||
|
|
|
|||
|
|
@ -29,23 +29,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
// if this is null, we were constructed with the intention that we'd represent the best genotype
|
||||
private DiploidGenotype mGenotype = null;
|
||||
|
||||
// which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best
|
||||
private DiploidGenotype mCompareTo = null;
|
||||
// the reference genotype and the next best genotype, lazy loaded
|
||||
private DiploidGenotype mRefGenotype = null;
|
||||
private DiploidGenotype mNextGenotype = null;
|
||||
|
||||
// are we best vrs ref or best vrs next - for internal consumption only
|
||||
private final boolean mBestVrsRef;
|
||||
//private final boolean mBestVrsRef;
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
* @param discovery are we representing the best vrs next or best vrs ref
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
*/
|
||||
public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||
mBestVrsRef = discovery;
|
||||
public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||
mGenotypeLikelihoods = gtlh;
|
||||
mLocation = location;
|
||||
|
|
@ -55,14 +54,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
* @param discovery are we representing the best vrs next or best vrs ref
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
*/
|
||||
SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||
mBestVrsRef = discovery;
|
||||
SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||
mGenotypeLikelihoods = gtlh;
|
||||
mLocation = location;
|
||||
|
|
@ -93,7 +90,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
public String toString() {
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
||||
getLocation(), mGenotype, mCompareTo, mRefBase, mPileup.getReads().size(),
|
||||
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
|
||||
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||
}
|
||||
|
||||
|
|
@ -105,13 +102,12 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
}
|
||||
|
||||
// our comparison
|
||||
if (mCompareTo == null) {
|
||||
if (this.mBestVrsRef) {
|
||||
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
|
||||
} else {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
if (mRefGenotype == null) {
|
||||
mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2));
|
||||
}
|
||||
if (mNextGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,15 +119,13 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
getBestGenotype();
|
||||
getAltGenotype();
|
||||
return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo));
|
||||
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the best genotype
|
||||
*/
|
||||
public DiploidGenotype getBestGenotype() {
|
||||
private DiploidGenotype getBestGenotype() {
|
||||
lazyEval();
|
||||
return mGenotype;
|
||||
}
|
||||
|
|
@ -139,9 +133,17 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
/**
|
||||
* get the alternate genotype
|
||||
*/
|
||||
public DiploidGenotype getAltGenotype() {
|
||||
private DiploidGenotype getNextBest() {
|
||||
lazyEval();
|
||||
return mCompareTo;
|
||||
return mNextGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the alternate genotype
|
||||
*/
|
||||
private DiploidGenotype getRefGenotype() {
|
||||
lazyEval();
|
||||
return mRefGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -209,12 +211,11 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariant(char ref) {
|
||||
return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString());
|
||||
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -222,8 +223,9 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
public Variant toVariation() {
|
||||
return null; // the next step is to implement the variant system
|
||||
public Variation toVariation() {
|
||||
double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
|
||||
return new BasicVariation(this.getBases(), this.getReference(), 0, this.mLocation, bestRef);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -268,7 +270,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
public Genotype getGenotype(DiploidGenotype x) {
|
||||
return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x);
|
||||
return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
gl.add(pileup, true);
|
||||
gl.validate();
|
||||
|
||||
return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup);
|
||||
return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -145,7 +145,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
}
|
||||
|
||||
/**
|
||||
* If we've found a LOD >= 5 variant, output it to disk.
|
||||
* If we've found a LOD variant or callable genotype, output it to disk.
|
||||
*
|
||||
* @param call an GenotypeCall object for the variant.
|
||||
* @param sum accumulator for the reduce.
|
||||
|
|
@ -155,15 +155,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
|
||||
sum.nCalledBases++;
|
||||
|
||||
// todo -- aaron, fixme -- this should be using variation() in discovery mode and genotype if not
|
||||
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
||||
if (call.getNegLog10PError() >= LOD_THRESHOLD) {
|
||||
double confidence = (GENOTYPE) ? call.getNegLog10PError() : call.toVariation().getNegLog10PError();
|
||||
if (confidence >= LOD_THRESHOLD) {
|
||||
sum.nConfidentCalls++;
|
||||
//System.out.printf("Call %s%n", call);
|
||||
sum.writer.addGenotypeCall(call);
|
||||
} else {
|
||||
} else
|
||||
sum.nNonConfidentCalls++;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,6 +79,6 @@ public interface Genotype {
|
|||
*
|
||||
* @return the variant
|
||||
*/
|
||||
public Variant toVariation();
|
||||
public Variation toVariation();
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
|
|
@ -8,7 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
* <p/>
|
||||
* This class represents a variant
|
||||
*/
|
||||
public interface Variant {
|
||||
public interface Variation {
|
||||
// the types of variants we currently allow
|
||||
public enum VARIANT_TYPE {
|
||||
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
|
||||
|
|
@ -47,19 +48,48 @@ public interface Variant {
|
|||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
public String toBases();
|
||||
public String getBases();
|
||||
|
||||
/**
|
||||
* get the location that this Variant represents
|
||||
*
|
||||
* @return a GenomeLoc
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference();
|
||||
|
||||
/** is our base representation heterozygous */
|
||||
public boolean isHet();
|
||||
|
||||
/** is our base representation homozygous */
|
||||
public boolean isHom();
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public double getNegLog10PError();
|
||||
|
||||
/**
|
||||
* are we truely a variant, given a reference
|
||||
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
||||
*/
|
||||
public boolean isReference();
|
||||
|
||||
/**
|
||||
* gets the alternate base. If this is homref, throws an UnsupportedOperationException
|
||||
* @return
|
||||
*/
|
||||
public char getAlternateBase();
|
||||
|
||||
}
|
||||
|
|
@ -79,43 +79,43 @@ public class SSGenotypeCallTest extends BaseTest {
|
|||
@Test
|
||||
public void testBestVrsRefSame() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call = new SSGenotypeCall(true, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(0, call.getNegLog10PError(), 0.0000001);
|
||||
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsRef2() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call2 = new SSGenotypeCall(true, myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(9, call2.getNegLog10PError(), 0.0000001);
|
||||
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsRef3() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call3 = new SSGenotypeCall(true, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(4, call3.getNegLog10PError(), 0.0000001);
|
||||
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testBestVrsNextSame() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsNext2() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call2 = new SSGenotypeCall(false, myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsNext3() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
SSGenotypeCall call3 = new SSGenotypeCall(false, myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue