*** We no longer have a separate model for the single-sample case. ***
For now, a single sample input will be special-cased in the EM model - but that will change when the EM model degenerates to the single sample output with a single sample as input. For now, the EM code for multi-samples isn't finished; I'm planning on checking that in soon. The SingleSampleIntegrationTest now uses the UnifiedCaller instead of SSG, and so should all of you. More on that in a separate email. Other minor cleanups added too. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1785 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
32128e093a
commit
04fe50cadd
|
|
@ -7,9 +7,9 @@ import org.apache.log4j.Logger;
|
|||
|
||||
import java.util.Set;
|
||||
|
||||
public class MultiSampleAllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
public class AllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
protected MultiSampleAllMAFsGenotypeCalculationModel() {}
|
||||
protected AllMAFsGenotypeCalculationModel() {}
|
||||
|
||||
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
|
|
@ -11,7 +11,7 @@ import java.util.*;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
// We need to set a limit on the EM iterations in case something flukey goes on
|
||||
private static final int MAX_EM_ITERATIONS = 6;
|
||||
|
|
@ -19,13 +19,16 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
// We consider the EM stable when the MAF doesn't change more than 1/10N
|
||||
private static final double EM_STABILITY_METRIC = 0.1;
|
||||
|
||||
// keep track of some metrics about our calls
|
||||
private CallMetrics callsMetrics = new CallMetrics();
|
||||
|
||||
protected MultiSampleEMGenotypeCalculationModel() {}
|
||||
|
||||
protected EMGenotypeCalculationModel() {}
|
||||
|
||||
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
// keep track of the GenotpeLikelihoods for each sample, separated by strand
|
||||
HashMap<String, PerSampleGenotypeLikelihoods> GLs = new HashMap<String, PerSampleGenotypeLikelihoods>();
|
||||
// keep track of the GenotypeLikelihoods for each sample, separated by strand
|
||||
HashMap<String, UnifiedGenotypeLikelihoods> GLs = new HashMap<String, UnifiedGenotypeLikelihoods>();
|
||||
|
||||
// keep track of the total counts of each base in the pileup
|
||||
int[] baseCounts = new int[4];
|
||||
|
|
@ -39,55 +42,76 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
|
||||
// set up the base data
|
||||
// get the read and offset
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
|
||||
// don't use bad bases
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( baseIndex == -1 ) {
|
||||
// skip deletions
|
||||
if ( offset == -1 ) {
|
||||
// are there too many deletions in the pileup?
|
||||
if ( ++deletionsInPile > maxDeletionsInPileup )
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// get the base; don't use bad bases
|
||||
char base = read.getReadString().charAt(offset);
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( baseIndex == -1 )
|
||||
continue;
|
||||
|
||||
// create the GL holder object if this is the first time we're seeing a base for this sample
|
||||
String readGroup = read.getAttribute("RG").toString(); // can't be null because those are filtered out
|
||||
String sample = read.getHeader().getReadGroup(readGroup).getSample();
|
||||
PerSampleGenotypeLikelihoods myGLs = GLs.get(sample);
|
||||
UnifiedGenotypeLikelihoods myGLs = GLs.get(sample);
|
||||
if ( myGLs == null ) {
|
||||
myGLs = new PerSampleGenotypeLikelihoods();
|
||||
myGLs = new UnifiedGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform, VERBOSE);
|
||||
GLs.put(sample, myGLs);
|
||||
}
|
||||
|
||||
// assign the base to the appropriate strand
|
||||
myGLs.addObservedBase(base, read, offset);
|
||||
myGLs.add(base, read, offset);
|
||||
|
||||
// update the base counts
|
||||
baseCounts[baseIndex]++;
|
||||
totalObservedBases++;
|
||||
}
|
||||
|
||||
// optimization: if all bases are ref, return
|
||||
if ( baseCounts[BaseUtils.simpleBaseToBaseIndex(ref)] == totalObservedBases )
|
||||
return true;
|
||||
// for now, we need to special-case single sample mode
|
||||
if ( samples.size() == 1 ) {
|
||||
UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next());
|
||||
// if there were no good bases, the likelihoods object wouldn't exist
|
||||
if ( UGL == null )
|
||||
return false;
|
||||
|
||||
// TODO -- Do we quit if there isn't X% of total samples represented in pileup?
|
||||
callsMetrics.nCalledBases++;
|
||||
UGL.setPriors(priors);
|
||||
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
|
||||
|
||||
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
|
||||
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||
if ( confidence >= LOD_THRESHOLD ) {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
out.addGenotypeCall(call);
|
||||
} else {
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
callsMetrics.nCalledBases++;
|
||||
|
||||
// Next, we need to create initial allele frequencies.
|
||||
// An intelligent guess would be the observed base ratios (plus some small number to account for sampling issues).
|
||||
|
||||
// TODO --- This needs to be broken out into a separate model as we might want to split up the calculation for each minor allele
|
||||
|
||||
double[] alleleFrequencies = new double[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
alleleFrequencies[i] = ((double)baseCounts[i] + DiploidGenotypePriors.HUMAN_HETEROZYGOSITY) / (double)totalObservedBases;
|
||||
DiploidGenotypePriors AFPriors = calculateAlleleFreqBasedPriors(alleleFrequencies);
|
||||
|
||||
for ( PerSampleGenotypeLikelihoods SGL : GLs.values() )
|
||||
SGL.createGLs(AFPriors);
|
||||
for ( UnifiedGenotypeLikelihoods UGL : GLs.values() )
|
||||
UGL.setPriors(AFPriors);
|
||||
|
||||
// debugging output
|
||||
for (int i = 0; i < 4; i++)
|
||||
|
|
@ -106,8 +130,8 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
// calculate the posterior-weighted allele frequencies and modify the priors accordingly
|
||||
double[] newAlleleFrequencies = getPosteriorWeightedFrequencies(GLs.values());
|
||||
AFPriors = calculateAlleleFreqBasedPriors(newAlleleFrequencies);
|
||||
for ( PerSampleGenotypeLikelihoods SGL : GLs.values() )
|
||||
SGL.setPriors(AFPriors);
|
||||
for ( UnifiedGenotypeLikelihoods UGL : GLs.values() )
|
||||
UGL.setPriors(AFPriors);
|
||||
|
||||
// determine whether we're stable
|
||||
double AF_delta = 0.0;
|
||||
|
|
@ -127,6 +151,13 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
if (true)
|
||||
throw new RuntimeException("DEBUGGING");
|
||||
|
||||
// compute actual priors: theta / MAF
|
||||
double pF = computeThetaOverMAF(ref, alleleFrequencies, GLs.size());
|
||||
|
||||
// the posteriors from the EM loop are the population likelihoods here
|
||||
// TODO -- finish me
|
||||
|
||||
|
||||
/*
|
||||
ClassicGenotypeLikelihoods[] G = em_result.genotype_likelihoods;
|
||||
pD = Compute_pD(G, em_result.sample_weights);
|
||||
|
|
@ -152,23 +183,21 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
|
||||
*/
|
||||
|
||||
// apply prior of theta / MAF
|
||||
// It turns out that the appropriate prior is theta, for any sample size from 1 to ~1,000
|
||||
//return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
double[] getPosteriorWeightedFrequencies(Collection<PerSampleGenotypeLikelihoods> SGLs) {
|
||||
double[] getPosteriorWeightedFrequencies(Collection<UnifiedGenotypeLikelihoods> UGLs) {
|
||||
double[] newAlleleLikelihoods = new double[4];
|
||||
|
||||
for ( PerSampleGenotypeLikelihoods SGL : SGLs ) {
|
||||
for ( UnifiedGenotypeLikelihoods UGL : UGLs ) {
|
||||
|
||||
// calculate the posterior weighted frequencies for this sample
|
||||
double[] personalAllelePosteriors = new double[4];
|
||||
double sum = 0;
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double posterior = Math.pow(10, SGL.totalGL.getPosterior(g));
|
||||
double posterior = Math.pow(10, UGL.getGenotypeLikelihoods().getPosterior(g));
|
||||
personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base1)] += posterior;
|
||||
personalAllelePosteriors[BaseUtils.simpleBaseToBaseIndex(g.base2)] += posterior;
|
||||
sum += 2.0 * posterior;
|
||||
|
|
@ -210,43 +239,34 @@ public class MultiSampleEMGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
return new DiploidGenotypePriors(alleleFreqPriors);
|
||||
}
|
||||
|
||||
private class PerSampleGenotypeLikelihoods {
|
||||
private double computeThetaOverMAF(char ref, double[] alleleFrequencies, int samplesAtLocus) {
|
||||
double MAF;
|
||||
Integer[] sortedIndexes = Utils.SortPermutation(alleleFrequencies);
|
||||
if ( sortedIndexes[3] != BaseUtils.simpleBaseToBaseIndex(ref) )
|
||||
MAF = alleleFrequencies[sortedIndexes[3]];
|
||||
else
|
||||
MAF = alleleFrequencies[sortedIndexes[2]];
|
||||
|
||||
GenotypeLikelihoods forwardStrandGL;
|
||||
GenotypeLikelihoods reverseStrandGL;
|
||||
GenotypeLikelihoods totalGL;
|
||||
int expectedChrs = (int)(2.0 * (double)samplesAtLocus * MAF);
|
||||
|
||||
public PerSampleGenotypeLikelihoods() {
|
||||
forwardStrandGL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform);
|
||||
forwardStrandGL.setVerbose(VERBOSE);
|
||||
reverseStrandGL = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, new DiploidGenotypePriors(), defaultPlatform);
|
||||
reverseStrandGL.setVerbose(VERBOSE);
|
||||
}
|
||||
// TODO -- we need to use the priors from the UnifiedArgumentCollection
|
||||
return DiploidGenotypePriors.HUMAN_HETEROZYGOSITY / (double)expectedChrs;
|
||||
}
|
||||
|
||||
public void addObservedBase(char base, SAMRecord read, int offset) {
|
||||
if ( !read.getReadNegativeStrandFlag() )
|
||||
forwardStrandGL.add(base, read.getBaseQualities()[offset], read, offset);
|
||||
else
|
||||
reverseStrandGL.add(base, read.getBaseQualities()[offset], read, offset);
|
||||
}
|
||||
|
||||
public void createGLs(DiploidGenotypePriors priors) {
|
||||
setPriors(priors);
|
||||
/**
|
||||
* A class to keep track of some basic metrics about our calls
|
||||
*/
|
||||
private class CallMetrics {
|
||||
long nConfidentCalls = 0;
|
||||
long nNonConfidentCalls = 0;
|
||||
long nCalledBases = 0;
|
||||
|
||||
// GL_i = GL+_i + GL-_i
|
||||
totalGL = GenotypeLikelihoods.combineLikelihoods(forwardStrandGL, reverseStrandGL);
|
||||
totalGL.setVerbose(VERBOSE);
|
||||
if ( VERBOSE )
|
||||
totalGL.validate();
|
||||
}
|
||||
CallMetrics() {}
|
||||
|
||||
public void setPriors(DiploidGenotypePriors priors) {
|
||||
if ( forwardStrandGL != null )
|
||||
forwardStrandGL.setPriors(priors);
|
||||
if ( reverseStrandGL != null )
|
||||
reverseStrandGL.setPriors(priors);
|
||||
if ( totalGL != null )
|
||||
totalGL.setPriors(priors);
|
||||
public String toString() {
|
||||
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
|
||||
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -14,9 +14,8 @@ import java.util.*;
|
|||
public abstract class GenotypeCalculationModel implements Cloneable {
|
||||
|
||||
public enum Model {
|
||||
SINGLE_SAMPLE,
|
||||
MULTI_SAMPLE_EM,
|
||||
MULTI_SAMPLE_ALL_MAFS
|
||||
EM,
|
||||
ALL_MAFS
|
||||
}
|
||||
|
||||
protected BaseMismatchModel baseModel;
|
||||
|
|
|
|||
|
|
@ -58,14 +58,11 @@ public class GenotypeCalculationModelFactory {
|
|||
Logger logger) {
|
||||
GenotypeCalculationModel gcm;
|
||||
switch ( UAC.genotypeModel ) {
|
||||
case SINGLE_SAMPLE:
|
||||
gcm = new SingleSampleGenotypeCalculationModel();
|
||||
case EM:
|
||||
gcm = new EMGenotypeCalculationModel();
|
||||
break;
|
||||
case MULTI_SAMPLE_EM:
|
||||
gcm = new MultiSampleEMGenotypeCalculationModel();
|
||||
break;
|
||||
case MULTI_SAMPLE_ALL_MAFS:
|
||||
gcm = new MultiSampleAllMAFsGenotypeCalculationModel();
|
||||
case ALL_MAFS:
|
||||
gcm = new AllMAFsGenotypeCalculationModel();
|
||||
break;
|
||||
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,32 +78,6 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
initialize();
|
||||
}
|
||||
|
||||
public static GenotypeLikelihoods combineLikelihoods(GenotypeLikelihoods gl1, GenotypeLikelihoods gl2) {
|
||||
if ( gl1 == null )
|
||||
return gl2;
|
||||
if ( gl2 == null )
|
||||
return gl1;
|
||||
|
||||
// GL = GL_i + GL_j
|
||||
|
||||
GenotypeLikelihoods gl;
|
||||
try {
|
||||
gl = (GenotypeLikelihoods)gl1.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
assert(gl1.likelihoods.length == gl1.posteriors.length);
|
||||
assert(gl1.likelihoods.length == gl2.likelihoods.length);
|
||||
assert(gl1.posteriors.length == gl2.posteriors.length);
|
||||
|
||||
for (int i = 0; i < gl1.likelihoods.length; i++) {
|
||||
gl.likelihoods[i] += gl2.likelihoods[i];
|
||||
gl.posteriors[i] += gl2.posteriors[i];
|
||||
}
|
||||
|
||||
return gl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloning of the object
|
||||
* @return
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class SingleSampleGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
private CallResult callsMetrics = new CallResult();
|
||||
|
||||
protected SingleSampleGenotypeCalculationModel() {}
|
||||
|
||||
public void initialize(BaseMismatchModel baseModel,
|
||||
Set<String> samples,
|
||||
EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform platform,
|
||||
GenotypeWriter out,
|
||||
Logger logger,
|
||||
boolean genotypeMode,
|
||||
double lod,
|
||||
int maxDeletions,
|
||||
boolean verbose) {
|
||||
super.initialize(baseModel, samples, platform, out, logger, genotypeMode, lod, maxDeletions, verbose);
|
||||
|
||||
// check that this truly is a single-sample bam
|
||||
if ( samples.size() > 1 )
|
||||
throw new StingException("Single-sample genotype calculation model used on multi-sample bam... aborting");
|
||||
}
|
||||
|
||||
public boolean calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
gl.setVerbose(VERBOSE);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
|
||||
// check that there aren't too many deletions in the pileup
|
||||
pileup.setIncludeDeletionsInPileupString(true);
|
||||
if ( Utils.countOccurrences(BasicPileup.DELETION_CHAR, pileup.getBases()) > maxDeletionsInPileup )
|
||||
return false;
|
||||
|
||||
gl.add(pileup, true);
|
||||
gl.validate();
|
||||
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, gl, pileup);
|
||||
|
||||
callsMetrics.nCalledBases++;
|
||||
|
||||
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
|
||||
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||
if ( confidence >= LOD_THRESHOLD ) {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
//System.out.printf("Call %s%n", call);
|
||||
out.addGenotypeCall(call);
|
||||
} else
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public class CallResult {
|
||||
long nConfidentCalls = 0;
|
||||
long nNonConfidentCalls = 0;
|
||||
long nCalledBases = 0;
|
||||
|
||||
CallResult() {}
|
||||
|
||||
public String toString() {
|
||||
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
|
||||
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,8 +40,8 @@ import java.io.File;
|
|||
public class UnifiedArgumentCollection {
|
||||
|
||||
// control the various models to be used
|
||||
@Argument(fullName = "genotypeModel", shortName = "gm", doc = "Genotype calculation model to employ -- SINGLE_SAMPLE and MULTI_SAMPLE_EM are the recommended choices, but it's possible to select MULTI_SAMPLE_ALL_MAFs", required = true)
|
||||
public GenotypeCalculationModel.Model genotypeModel = null;
|
||||
@Argument(fullName = "genotypeModel", shortName = "gm", doc = "Genotype calculation model to employ -- EM is the default choice; the ALL_MAFS model is experimental", required = false)
|
||||
public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM;
|
||||
|
||||
@Argument(fullName = "baseModel", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false)
|
||||
public BaseMismatchModel baseModel = BaseMismatchModel.EMPIRICAL;
|
||||
|
|
@ -71,7 +71,8 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "maxCoverage", shortName = "maxCoverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
|
||||
public Integer MAX_READS_IN_PILEUP = 10000;
|
||||
|
||||
|
||||
//@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
|
||||
//public boolean disableCache = false;
|
||||
//public boolean disableCache = false;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,21 +5,20 @@ import org.junit.Test;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
||||
public static String baseTestString() {
|
||||
return "-T SingleSampleGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s";
|
||||
return "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s";
|
||||
}
|
||||
|
||||
public static String testGeliLod5() {
|
||||
return baseTestString() + " --variant_output_format GELI -lod 5";
|
||||
}
|
||||
|
||||
private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb";
|
||||
private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1";
|
||||
private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf";
|
||||
private static String OneMb1StateMD5 = "cba4436066b5d88a461cc3eba74ad944";
|
||||
private static String OneMb3StateMD5 = "799dde914dd4afbffca9502b7f284780";
|
||||
private static String OneMbEmpiricalMD5 = "2b99b3c667c8ac94e8268b17e6979073";
|
||||
|
||||
// private static String oneMbMD5(BaseMismatchModel m) {
|
||||
// switch (m) {
|
||||
|
|
@ -43,14 +42,14 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMultiTechnologies() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T SingleSampleGenotyper" +
|
||||
"-T UnifiedGenotyper" +
|
||||
" -R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
|
||||
" -varout %s" +
|
||||
" -L 1:10,000,000-10,100,000" +
|
||||
" -m empirical",
|
||||
" -bm empirical",
|
||||
1,
|
||||
Arrays.asList("09f7ea954a44e68b80726147aee10944"));
|
||||
Arrays.asList("0a923f676111b9bc27ccb1a9b97eafcc"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
}
|
||||
|
|
@ -60,6 +59,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
// testing the cache
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
/*
|
||||
@Test
|
||||
public void testCache() {
|
||||
for ( BaseMismatchModel model : BaseMismatchModel.values() ) {
|
||||
|
|
@ -70,11 +70,12 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
List<String> withoutCache = executeTest("empirical1MbTest", withoutCacheSpec ).getSecond();
|
||||
|
||||
WalkerTest.WalkerTestSpec withCacheSpec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m " + model.toString(), 1,
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm " + model.toString(), 1,
|
||||
withoutCache);
|
||||
executeTest(String.format("testCache[%s]", model), withCacheSpec );
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -84,8 +85,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void genotypeTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1,
|
||||
Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a"));
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
|
||||
Arrays.asList("436c0e3365f61bf1d06eb630c025e51b"));
|
||||
executeTest("genotypeTest", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,14 +98,14 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void oneState100bpTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,000,100 -m one_state", 1, Arrays.asList("3cd402d889c015be4a318123468f4262"));
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,000,100 -bm one_state", 1, Arrays.asList("3cd402d889c015be4a318123468f4262"));
|
||||
executeTest("oneState100bpTest", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void oneState1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m one_state",
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm one_state",
|
||||
1, Arrays.asList(OneMb1StateMD5));
|
||||
executeTest("oneState1MbTest", spec);
|
||||
}
|
||||
|
|
@ -112,7 +113,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void threeState1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m three_state", 1,
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm three_state", 1,
|
||||
Arrays.asList(OneMb3StateMD5));
|
||||
executeTest("threeState1MbTest", spec);
|
||||
}
|
||||
|
|
@ -120,7 +121,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void empirical1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m empirical", 1,
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm empirical", 1,
|
||||
Arrays.asList(OneMbEmpiricalMD5));
|
||||
executeTest("empirical1MbTest", spec);
|
||||
}
|
||||
|
|
@ -144,12 +145,12 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testLOD() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" );
|
||||
e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" );
|
||||
e.put( 10.0, "3ec7815482112e0b9b4487ec69a52b67" );
|
||||
e.put( 3.0, "20da42fdce2e1c97d9c8d4935d31125d" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -m EMPIRICAL -lod " + entry.getKey(), 1,
|
||||
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -bm EMPIRICAL -lod " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest("testLOD", spec);
|
||||
}
|
||||
|
|
@ -163,13 +164,13 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "c6fc697d31cfda563145138126561c77" );
|
||||
e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" );
|
||||
e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" );
|
||||
|
||||
e.put( 0.01, "e7494e6a62bd91ca02537c327a104395" );
|
||||
e.put( 0.0001, "1d51c711353d0db5b54dd0d2a7899c49" );
|
||||
e.put( 1.0 / 1850, "873f558a2c07ec40635e35d275b12d69" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m EMPIRICAL --heterozygosity " + entry.getKey(), 1,
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -bm EMPIRICAL --heterozygosity " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue