*** We no longer have a separate model for the single-sample case. ***

For now, a single sample input will be special-cased in the EM model - but that will change when the EM model degenerates to the single sample output with a single sample as input.  For now, the EM code for multi-samples isn't finished; I'm planning on checking that in soon.

The SingleSampleIntegrationTest now uses the UnifiedCaller instead of SSG, and so should all of you.  More on that in a separate email.
Other minor cleanups added too.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1785 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-10-08 14:08:57 +00:00
parent 32128e093a
commit 04fe50cadd
8 changed files with 114 additions and 197 deletions

View File

@ -7,9 +7,9 @@ import org.apache.log4j.Logger;
import java.util.Set; import java.util.Set;
public class MultiSampleAllMAFsGenotypeCalculationModel extends GenotypeCalculationModel { public class AllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
protected MultiSampleAllMAFsGenotypeCalculationModel() {} protected AllMAFsGenotypeCalculationModel() {}
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {

View File

@ -11,7 +11,7 @@ import java.util.*;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationModel { public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
// We need to set a limit on the EM iterations in case something flukey goes on // We need to set a limit on the EM iterations in case something flukey goes on
private static final int MAX_EM_ITERATIONS = 6; private static final int MAX_EM_ITERATIONS = 6;
@ -19,13 +19,16 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
// We consider the EM stable when the MAF doesn't change more than 1/10N // We consider the EM stable when the MAF doesn't change more than 1/10N
private static final double EM_STABILITY_METRIC = 0.1; private static final double EM_STABILITY_METRIC = 0.1;
// keep track of some metrics about our calls
private CallMetrics callsMetrics = new CallMetrics();
protected MultiSampleEMGenotypeCalculationModel() {}
protected EMGenotypeCalculationModel() {}
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the GenotpeLikelihoods for each sample, separated by strand // keep track of the GenotypeLikelihoods for each sample, separated by strand
HashMap<String, PerSampleGenotypeLikelihoods> GLs = new HashMap<String, PerSampleGenotypeLikelihoods>(); HashMap<String, UnifiedGenotypeLikelihoods> GLs = new HashMap<String, UnifiedGenotypeLikelihoods>();
// keep track of the total counts of each base in the pileup // keep track of the total counts of each base in the pileup
int[] baseCounts = new int[4]; int[] baseCounts = new int[4];
@ -39,55 +42,76 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
for (int i = 0; i < reads.size(); i++) { for (int i = 0; i < reads.size(); i++) {
// set up the base data // get the read and offset
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
int offset = offsets.get(i); int offset = offsets.get(i);
char base = read.getReadString().charAt(offset);
// don't use bad bases // skip deletions
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); if ( offset == -1 ) {
if ( baseIndex == -1 ) {
// are there too many deletions in the pileup? // are there too many deletions in the pileup?
if ( ++deletionsInPile > maxDeletionsInPileup ) if ( ++deletionsInPile > maxDeletionsInPileup )
return false; return false;
continue; continue;
} }
// get the base; don't use bad bases
char base = read.getReadString().charAt(offset);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
if ( baseIndex == -1 )
continue;
// create the GL holder object if this is the first time we're seeing a base for this sample // create the GL holder object if this is the first time we're seeing a base for this sample
String readGroup = read.getAttribute("RG").toString(); // can't be null because those are filtered out String readGroup = read.getAttribute("RG").toString(); // can't be null because those are filtered out
String sample = read.getHeader().getReadGroup(readGroup).getSample(); String sample = read.getHeader().getReadGroup(readGroup).getSample();
PerSampleGenotypeLikelihoods myGLs = GLs.get(sample); UnifiedGenotypeLikelihoods myGLs = GLs.get(sample);
if ( myGLs == null ) { if ( myGLs == null ) {
myGLs = new PerSampleGenotypeLikelihoods(); myGLs = new UnifiedGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform, VERBOSE);
GLs.put(sample, myGLs); GLs.put(sample, myGLs);
} }
// assign the base to the appropriate strand // assign the base to the appropriate strand
myGLs.addObservedBase(base, read, offset); myGLs.add(base, read, offset);
// update the base counts // update the base counts
baseCounts[baseIndex]++; baseCounts[baseIndex]++;
totalObservedBases++; totalObservedBases++;
} }
// optimization: if all bases are ref, return // for now, we need to special-case single sample mode
if ( baseCounts[BaseUtils.simpleBaseToBaseIndex(ref)] == totalObservedBases ) if ( samples.size() == 1 ) {
return true; UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next());
// if there were no good bases, the likelihoods object wouldn't exist
if ( UGL == null )
return false;
// TODO -- Do we quit if there isn't X% of total samples represented in pileup? callsMetrics.nCalledBases++;
UGL.setPriors(priors);
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
if ( confidence >= LOD_THRESHOLD ) {
callsMetrics.nConfidentCalls++;
out.addGenotypeCall(call);
} else {
callsMetrics.nNonConfidentCalls++;
}
}
return true;
}
callsMetrics.nCalledBases++;
// Next, we need to create initial allele frequencies. // Next, we need to create initial allele frequencies.
// An intelligent guess would be the observed base ratios (plus some small number to account for sampling issues). // An intelligent guess would be the observed base ratios (plus some small number to account for sampling issues).
// TODO --- This needs to be broken out into a separate model as we might want to split up the calculation for each minor allele
double[] alleleFrequencies = new double[4]; double[] alleleFrequencies = new double[4];
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
alleleFrequencies[i] = ((double)baseCounts[i] + DiploidGenotypePriors.HUMAN_HETEROZYGOSITY) / (double)totalObservedBases; alleleFrequencies[i] = ((double)baseCounts[i] + DiploidGenotypePriors.HUMAN_HETEROZYGOSITY) / (double)totalObservedBases;
DiploidGenotypePriors AFPriors = calculateAlleleFreqBasedPriors(alleleFrequencies); DiploidGenotypePriors AFPriors = calculateAlleleFreqBasedPriors(alleleFrequencies);
for ( PerSampleGenotypeLikelihoods SGL : GLs.values() ) for ( UnifiedGenotypeLikelihoods UGL : GLs.values() )
SGL.createGLs(AFPriors); UGL.setPriors(AFPriors);
// debugging output // debugging output
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
@ -106,8 +130,8 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
// calculate the posterior-weighted allele frequencies and modify the priors accordingly // calculate the posterior-weighted allele frequencies and modify the priors accordingly
double[] newAlleleFrequencies = getPosteriorWeightedFrequencies(GLs.values()); double[] newAlleleFrequencies = getPosteriorWeightedFrequencies(GLs.values());
AFPriors = calculateAlleleFreqBasedPriors(newAlleleFrequencies); AFPriors = calculateAlleleFreqBasedPriors(newAlleleFrequencies);
for ( PerSampleGenotypeLikelihoods SGL : GLs.values() ) for ( UnifiedGenotypeLikelihoods UGL : GLs.values() )
SGL.setPriors(AFPriors); UGL.setPriors(AFPriors);
// determine whether we're stable // determine whether we're stable
double AF_delta = 0.0; double AF_delta = 0.0;
@ -127,6 +151,13 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
if (true) if (true)
throw new RuntimeException("DEBUGGING"); throw new RuntimeException("DEBUGGING");
// compute actual priors: theta / MAF
double pF = computeThetaOverMAF(ref, alleleFrequencies, GLs.size());
// the posteriors from the EM loop are the population likelihoods here
// TODO -- finish me
/* /*
ClassicGenotypeLikelihoods[] G = em_result.genotype_likelihoods; ClassicGenotypeLikelihoods[] G = em_result.genotype_likelihoods;
pD = Compute_pD(G, em_result.sample_weights); pD = Compute_pD(G, em_result.sample_weights);
@ -152,23 +183,21 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
*/ */
// apply prior of theta / MAF
// It turns out that the appropriate prior is theta, for any sample size from 1 to ~1,000
//return new SSGenotypeCall(context.getLocation(), ref,gl, pileup); //return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
return true; return true;
} }
double[] getPosteriorWeightedFrequencies(Collection<PerSampleGenotypeLikelihoods> SGLs) { double[] getPosteriorWeightedFrequencies(Collection<UnifiedGenotypeLikelihoods> UGLs) {
double[] newAlleleLikelihoods = new double[4]; double[] newAlleleLikelihoods = new double[4];
for ( PerSampleGenotypeLikelihoods SGL : SGLs ) { for ( UnifiedGenotypeLikelihoods UGL : UGLs ) {
// calculate the posterior weighted frequencies for this sample // calculate the posterior weighted frequencies for this sample
double[] personalAllelePosteriors = new double[4]; double[] personalAllelePosteriors = new double[4];
double sum = 0; double sum = 0;
for ( DiploidGenotype g : DiploidGenotype.values() ) { for ( DiploidGenotype g : DiploidGenotype.values() ) {
double posterior = Math.pow(10, SGL.totalGL.getPosterior(g)); double posterior = Math.pow(10, UGL.getGenotypeLikelihoods().getPosterior(g));
personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base1)] += posterior; personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base1)] += posterior;
personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base2)] += posterior; personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base2)] += posterior;
sum += 2.0 * posterior; sum += 2.0 * posterior;
@ -210,43 +239,34 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
return new DiploidGenotypePriors(alleleFreqPriors); return new DiploidGenotypePriors(alleleFreqPriors);
} }
private class PerSampleGenotypeLikelihoods { private double computeThetaOverMAF(char ref, double[] alleleFrequencies, int samplesAtLocus) {
double MAF;
Integer[] sortedIndexes = Utils.SortPermutation(alleleFrequencies);
if ( sortedIndexes[3] != BaseUtils.simpleBaseToBaseIndex(ref) )
MAF = alleleFrequencies[sortedIndexes[3]];
else
MAF = alleleFrequencies[sortedIndexes[2]];
GenotypeLikelihoods forwardStrandGL; int expectedChrs = (int)(2.0 * (double)samplesAtLocus * MAF);
GenotypeLikelihoods reverseStrandGL;
GenotypeLikelihoods totalGL;
public PerSampleGenotypeLikelihoods() { // TODO -- we need to use the priors from the UnifiedArgumentCollection
forwardStrandGL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform); return DiploidGenotypePriors.HUMAN_HETEROZYGOSITY / (double)expectedChrs;
forwardStrandGL.setVerbose(VERBOSE); }
reverseStrandGL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform);
reverseStrandGL.setVerbose(VERBOSE);
}
public void addObservedBase(char base, SAMRecord read, int offset) {
if ( !read.getReadNegativeStrandFlag() )
forwardStrandGL.add(base, read.getBaseQualities()[offset], read, offset);
else
reverseStrandGL.add(base, read.getBaseQualities()[offset], read, offset);
}
public void createGLs(DiploidGenotypePriors priors) { /**
setPriors(priors); * A class to keep track of some basic metrics about our calls
*/
private class CallMetrics {
long nConfidentCalls = 0;
long nNonConfidentCalls = 0;
long nCalledBases = 0;
// GL_i = GL+_i + GL-_i CallMetrics() {}
totalGL = GenotypeLikelihoods.combineLikelihoods(forwardStrandGL, reverseStrandGL);
totalGL.setVerbose(VERBOSE);
if ( VERBOSE )
totalGL.validate();
}
public void setPriors(DiploidGenotypePriors priors) { public String toString() {
if ( forwardStrandGL != null ) return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
forwardStrandGL.setPriors(priors); nConfidentCalls, nNonConfidentCalls, nCalledBases);
if ( reverseStrandGL != null )
reverseStrandGL.setPriors(priors);
if ( totalGL != null )
totalGL.setPriors(priors);
} }
} }
} }

View File

@ -14,9 +14,8 @@ import java.util.*;
public abstract class GenotypeCalculationModel implements Cloneable { public abstract class GenotypeCalculationModel implements Cloneable {
public enum Model { public enum Model {
SINGLE_SAMPLE, EM,
MULTI_SAMPLE_EM, ALL_MAFS
MULTI_SAMPLE_ALL_MAFS
} }
protected BaseMismatchModel baseModel; protected BaseMismatchModel baseModel;

View File

@ -58,14 +58,11 @@ public class GenotypeCalculationModelFactory {
Logger logger) { Logger logger) {
GenotypeCalculationModel gcm; GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) { switch ( UAC.genotypeModel ) {
case SINGLE_SAMPLE: case EM:
gcm = new SingleSampleGenotypeCalculationModel(); gcm = new EMGenotypeCalculationModel();
break; break;
case MULTI_SAMPLE_EM: case ALL_MAFS:
gcm = new MultiSampleEMGenotypeCalculationModel(); gcm = new AllMAFsGenotypeCalculationModel();
break;
case MULTI_SAMPLE_ALL_MAFS:
gcm = new MultiSampleAllMAFsGenotypeCalculationModel();
break; break;
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
} }

View File

@ -78,32 +78,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
initialize(); initialize();
} }
public static GenotypeLikelihoods combineLikelihoods(GenotypeLikelihoods gl1, GenotypeLikelihoods gl2) {
if ( gl1 == null )
return gl2;
if ( gl2 == null )
return gl1;
// GL = GL_i + GL_j
GenotypeLikelihoods gl;
try {
gl = (GenotypeLikelihoods)gl1.clone();
} catch (CloneNotSupportedException e) {
throw new StingException(e.getMessage());
}
assert(gl1.likelihoods.length == gl1.posteriors.length);
assert(gl1.likelihoods.length == gl2.likelihoods.length);
assert(gl1.posteriors.length == gl2.posteriors.length);
for (int i = 0; i < gl1.likelihoods.length; i++) {
gl.likelihoods[i] += gl2.likelihoods[i];
gl.posteriors[i] += gl2.posteriors[i];
}
return gl;
}
/** /**
* Cloning of the object * Cloning of the object
* @return * @return

View File

@ -1,75 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.apache.log4j.Logger;
import java.util.Set;
public class SingleSampleGenotypeCalculationModel extends GenotypeCalculationModel {
private CallResult callsMetrics = new CallResult();
protected SingleSampleGenotypeCalculationModel() {}
public void initialize(BaseMismatchModel baseModel,
Set<String> samples,
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform platform,
GenotypeWriter out,
Logger logger,
boolean genotypeMode,
double lod,
int maxDeletions,
boolean verbose) {
super.initialize(baseModel, samples, platform, out, logger, genotypeMode, lod, maxDeletions, verbose);
// check that this truly is a single-sample bam
if ( samples.size() > 1 )
throw new StingException("Single-sample genotype calculation model used on multi-sample bam... aborting");
}
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
gl.setVerbose(VERBOSE);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
// check that there aren't too many deletions in the pileup
pileup.setIncludeDeletionsInPileupString(true);
if ( Utils.countOccurrences(BasicPileup.DELETION_CHAR, pileup.getBases()) > maxDeletionsInPileup )
return false;
gl.add(pileup, true);
gl.validate();
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, gl, pileup);
callsMetrics.nCalledBases++;
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
if ( confidence >= LOD_THRESHOLD ) {
callsMetrics.nConfidentCalls++;
//System.out.printf("Call %s%n", call);
out.addGenotypeCall(call);
} else
callsMetrics.nNonConfidentCalls++;
}
return true;
}
public class CallResult {
long nConfidentCalls = 0;
long nNonConfidentCalls = 0;
long nCalledBases = 0;
CallResult() {}
public String toString() {
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
nConfidentCalls, nNonConfidentCalls, nCalledBases);
}
}
}

View File

@ -40,8 +40,8 @@ import java.io.File;
public class UnifiedArgumentCollection { public class UnifiedArgumentCollection {
// control the various models to be used // control the various models to be used
@Argument(fullName = "genotypeModel", shortName = "gm", doc = "Genotype calculation model to employ -- SINGLE_SAMPLE and MULTI_SAMPLE_EM are the recommended choices, but it's possible to select MULTI_SAMPLE_ALL_MAFs", required = true) @Argument(fullName = "genotypeModel", shortName = "gm", doc = "Genotype calculation model to employ -- EM is the default choice; the ALL_MAFS model is experimental", required = false)
public GenotypeCalculationModel.Model genotypeModel = null; public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM;
@Argument(fullName = "baseModel", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false) @Argument(fullName = "baseModel", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false)
public BaseMismatchModel baseModel = BaseMismatchModel.EMPIRICAL; public BaseMismatchModel baseModel = BaseMismatchModel.EMPIRICAL;
@ -71,7 +71,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "maxCoverage", shortName = "maxCoverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false) @Argument(fullName = "maxCoverage", shortName = "maxCoverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
public Integer MAX_READS_IN_PILEUP = 10000; public Integer MAX_READS_IN_PILEUP = 10000;
//@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false) //@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
//public boolean disableCache = false; //public boolean disableCache = false;
} }

View File

@ -5,21 +5,20 @@ import org.junit.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
public class SingleSampleGenotyperIntegrationTest extends WalkerTest { public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
public static String baseTestString() { public static String baseTestString() {
return "-T SingleSampleGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s"; return "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s";
} }
public static String testGeliLod5() { public static String testGeliLod5() {
return baseTestString() + " --variant_output_format GELI -lod 5"; return baseTestString() + " --variant_output_format GELI -lod 5";
} }
private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb"; private static String OneMb1StateMD5 = "cba4436066b5d88a461cc3eba74ad944";
private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1"; private static String OneMb3StateMD5 = "799dde914dd4afbffca9502b7f284780";
private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf"; private static String OneMbEmpiricalMD5 = "2b99b3c667c8ac94e8268b17e6979073";
// private static String oneMbMD5(BaseMismatchModel m) { // private static String oneMbMD5(BaseMismatchModel m) {
// switch (m) { // switch (m) {
@ -43,14 +42,14 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testMultiTechnologies() { public void testMultiTechnologies() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SingleSampleGenotyper" + "-T UnifiedGenotyper" +
" -R /broad/1KG/reference/human_b36_both.fasta" + " -R /broad/1KG/reference/human_b36_both.fasta" +
" -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + " -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
" -varout %s" + " -varout %s" +
" -L 1:10,000,000-10,100,000" + " -L 1:10,000,000-10,100,000" +
" -m empirical", " -bm empirical",
1, 1,
Arrays.asList("09f7ea954a44e68b80726147aee10944")); Arrays.asList("0a923f676111b9bc27ccb1a9b97eafcc"));
executeTest(String.format("testMultiTechnologies"), spec); executeTest(String.format("testMultiTechnologies"), spec);
} }
@ -60,6 +59,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
// testing the cache // testing the cache
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
/*
@Test @Test
public void testCache() { public void testCache() {
for ( BaseMismatchModel model : BaseMismatchModel.values() ) { for ( BaseMismatchModel model : BaseMismatchModel.values() ) {
@ -70,11 +70,12 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
List<String> withoutCache = executeTest("empirical1MbTest", withoutCacheSpec ).getSecond(); List<String> withoutCache = executeTest("empirical1MbTest", withoutCacheSpec ).getSecond();
WalkerTest.WalkerTestSpec withCacheSpec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec withCacheSpec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m " + model.toString(), 1, testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm " + model.toString(), 1,
withoutCache); withoutCache);
executeTest(String.format("testCache[%s]", model), withCacheSpec ); executeTest(String.format("testCache[%s]", model), withCacheSpec );
} }
} }
*/
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
@ -84,8 +85,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void genotypeTest() { public void genotypeTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1, testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a")); Arrays.asList("436c0e3365f61bf1d06eb630c025e51b"));
executeTest("genotypeTest", spec); executeTest("genotypeTest", spec);
} }
@ -97,14 +98,14 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void oneState100bpTest() { public void oneState100bpTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,000,100 -m one_state", 1, Arrays.asList("3cd402d889c015be4a318123468f4262")); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,000,100 -bm one_state", 1, Arrays.asList("3cd402d889c015be4a318123468f4262"));
executeTest("oneState100bpTest", spec); executeTest("oneState100bpTest", spec);
} }
@Test @Test
public void oneState1MbTest() { public void oneState1MbTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m one_state", testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm one_state",
1, Arrays.asList(OneMb1StateMD5)); 1, Arrays.asList(OneMb1StateMD5));
executeTest("oneState1MbTest", spec); executeTest("oneState1MbTest", spec);
} }
@ -112,7 +113,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void threeState1MbTest() { public void threeState1MbTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m three_state", 1, testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm three_state", 1,
Arrays.asList(OneMb3StateMD5)); Arrays.asList(OneMb3StateMD5));
executeTest("threeState1MbTest", spec); executeTest("threeState1MbTest", spec);
} }
@ -120,7 +121,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void empirical1MbTest() { public void empirical1MbTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m empirical", 1, testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm empirical", 1,
Arrays.asList(OneMbEmpiricalMD5)); Arrays.asList(OneMbEmpiricalMD5));
executeTest("empirical1MbTest", spec); executeTest("empirical1MbTest", spec);
} }
@ -144,12 +145,12 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testLOD() { public void testLOD() {
HashMap<Double, String> e = new HashMap<Double, String>(); HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" ); e.put( 10.0, "3ec7815482112e0b9b4487ec69a52b67" );
e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" ); e.put( 3.0, "20da42fdce2e1c97d9c8d4935d31125d" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -m EMPIRICAL -lod " + entry.getKey(), 1, baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -bm EMPIRICAL -lod " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest("testLOD", spec); executeTest("testLOD", spec);
} }
@ -163,13 +164,13 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testHeterozyosity() { public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>(); HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "c6fc697d31cfda563145138126561c77" ); e.put( 0.01, "e7494e6a62bd91ca02537c327a104395" );
e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" ); e.put( 0.0001, "1d51c711353d0db5b54dd0d2a7899c49" );
e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" ); e.put( 1.0 / 1850, "873f558a2c07ec40635e35d275b12d69" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m EMPIRICAL --heterozygosity " + entry.getKey(), 1, testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm EMPIRICAL --heterozygosity " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
} }