Next intermediate commit for new pool caller structure: a) Bug fixes in pool GL computation. Now, correct GL's are returned per each pool to the UG engine. Work still needs to be done in redoing interface with exact model. b) Added unit tests for new MathUtils dot product and logDotProduct functions. c) Refactorings of UnifiedGentotyperEngine since N (size of prior/posterior arrays) is no longer necessarily nSamples+1 but, in general, nSamplesPerPool*nPools+1

This commit is contained in:
Guillermo del Angel 2012-03-24 21:49:43 -04:00
parent 0a56a14d09
commit deb4586559
5 changed files with 23 additions and 10 deletions

View File

@ -955,8 +955,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/> -->
<jvmarg value="-Xdebug"/>
<jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/>
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">

View File

@ -221,7 +221,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, samples.size());
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -115,11 +115,11 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples);
@ -130,7 +130,8 @@ public class UnifiedGenotyperEngine {
this.verboseWriter = verboseWriter;
this.annotationEngine = engine;
N = 2 * this.samples.size();
this.N = N;
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
@ -703,7 +704,7 @@ public class UnifiedGenotyperEngine {
for (int i = 0; i < glmClasses.size(); i++) {
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
try {
Object args[] = new Object[]{UAC,logger};
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);

View File

@ -1050,7 +1050,7 @@ public class MathUtils {
/**
* Given two log-probability vectors, compute log of vector product of them:
* in Matlab notation, return log(10.*x'*10.^y)
* in Matlab notation, return log10(10.*x'*10.^y)
* @param x vector 1
* @param y vector 2
* @return a double representing log (dotProd(10.^x,10.^y)
@ -1065,7 +1065,7 @@ public class MathUtils {
tmpVec[k] = x[k]+y[k];
}
return sumLog10(tmpVec);
return log10sumLog10(tmpVec);

View File

@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
}
@Test
public void testDotProduct() {
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0);
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0);
}
@Test
public void testLogDotProduct() {
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0);
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0);
}
/**
* Private function used by testNormalizeFromLog10()
*/