General WalkerTest framework. Includes some minor changes to GATK core to enable creation of true command-line like GATK modules in the code. Extensive first-pass tests for SSG
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1538 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
471ca8201e
commit
2b0d1c52b2
|
|
@ -45,6 +45,8 @@ import java.util.Arrays;
|
|||
* @author aaron
|
||||
*/
|
||||
public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||
public Object GATKResult = null;
|
||||
|
||||
|
||||
/**
|
||||
* The actual engine which performs the analysis.
|
||||
|
|
@ -70,6 +72,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
* @return the return code to exit the program with
|
||||
*/
|
||||
protected int execute() {
|
||||
GATKResult = executeGATK();
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected Object executeGATK() {
|
||||
GATKArgumentCollection arguments = getArgumentCollection();
|
||||
|
||||
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
|
||||
|
|
@ -83,9 +90,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
|
||||
// set the analysis name in the argument collection
|
||||
arguments.analysisName = getAnalysisName();
|
||||
GATKEngine.execute(arguments, mWalker);
|
||||
|
||||
return 0;
|
||||
return GATKEngine.execute(arguments, mWalker);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
try {
|
||||
CommandLineGATK instance = new CommandLineGATK();
|
||||
start(instance, argv);
|
||||
System.exit(CommandLineProgram.result); // todo -- this is a painful hack
|
||||
} catch (Exception e) {
|
||||
exitSystemWithError(e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
if (Types.containsKey(boundName)) {
|
||||
throw new RuntimeException(String.format("GATK BUG: adding ROD module %s that is already bound", boundName));
|
||||
}
|
||||
logger.info(String.format("* Adding rod class %s%n", name));
|
||||
logger.info(String.format("* Adding rod class %s", name));
|
||||
Types.put(boundName, new RODBinding(name, rodType));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
private final static int FIXED_PLOIDY = 2;
|
||||
private final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
|
||||
protected boolean enableCacheFlag = true;
|
||||
|
||||
//
|
||||
// The three fundamental data arrays associated with a Genotype Likelhoods object
|
||||
//
|
||||
|
|
@ -175,8 +177,12 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
*
|
||||
* @return true if caching should be enabled, false otherwise
|
||||
*/
|
||||
protected boolean enableCache() {
|
||||
return true;
|
||||
public boolean cacheIsEnabled() {
|
||||
return enableCacheFlag;
|
||||
}
|
||||
|
||||
public void setEnableCacheFlag(boolean enable) {
|
||||
enableCacheFlag = enable;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -247,7 +253,7 @@ public abstract class GenotypeLikelihoods implements Cloneable {
|
|||
|
||||
if ( qualityScore > getMinQScoreToInclude() ) {
|
||||
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
|
||||
boolean enableCache = enableCacheArg && enableCache();
|
||||
boolean enableCache = enableCacheArg && cacheIsEnabled();
|
||||
GenotypeLikelihoods cached = null;
|
||||
if ( enableCache ) {
|
||||
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read, offset ) ) {
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
||||
if (other == null)
|
||||
return false;
|
||||
if (other instanceof SSGenotypeCall) {
|
||||
|
|
@ -89,11 +91,29 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f likelihoods=%s",
|
||||
getLocation(), mRefBase, mPileup.getReads().size(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
||||
getLocation(), mGenotype, mCompareTo, mRefBase, mPileup.getReads().size(),
|
||||
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||
}
|
||||
|
||||
private void lazyEval() {
|
||||
// us
|
||||
if (mGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
}
|
||||
|
||||
// our comparison
|
||||
if (mCompareTo == null) {
|
||||
if (this.mBestVrsRef) {
|
||||
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
|
||||
} else {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -112,10 +132,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
* get the best genotype
|
||||
*/
|
||||
public DiploidGenotype getBestGenotype() {
|
||||
if (mGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
}
|
||||
lazyEval();
|
||||
return mGenotype;
|
||||
}
|
||||
|
||||
|
|
@ -123,14 +140,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
* get the alternate genotype
|
||||
*/
|
||||
public DiploidGenotype getAltGenotype() {
|
||||
if (mCompareTo == null) {
|
||||
if (this.mBestVrsRef) {
|
||||
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
|
||||
} else {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
}
|
||||
lazyEval();
|
||||
return mCompareTo;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,10 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
@Argument(fullName = "platform", shortName = "pl", doc = "Causes the genotyper to assume that reads without PL header TAG are this platform. Defaults to null, indicating that the system will throw a runtime exception when such reads are detected", required = false)
|
||||
public EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform = null;
|
||||
|
||||
@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
|
||||
public boolean disableCache = false;
|
||||
|
||||
|
||||
public class CallResult {
|
||||
long nConfidentCalls = 0;
|
||||
long nNonConfidentCalls = 0;
|
||||
|
|
@ -94,6 +98,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
|||
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
|
||||
gl.setVerbose(VERBOSE);
|
||||
gl.setEnableCacheFlag(! disableCache);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
gl.add(pileup, true);
|
||||
gl.validate();
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public abstract class CommandLineProgram {
|
|||
* this is used to indicate if they've asked for help
|
||||
*/
|
||||
@Argument(fullName="help",shortName="h",doc="Generate this help message",required=false)
|
||||
public Boolean help = false;
|
||||
public Boolean help = false;
|
||||
|
||||
/**
|
||||
* our logging output patterns
|
||||
|
|
@ -139,6 +139,8 @@ public abstract class CommandLineProgram {
|
|||
BasicConfigurator.configure();
|
||||
}
|
||||
|
||||
public static int result = 0;
|
||||
|
||||
/**
|
||||
* This function is called to start processing the command line, and kick
|
||||
* off the execute message of the program.
|
||||
|
|
@ -233,10 +235,10 @@ public abstract class CommandLineProgram {
|
|||
clp.setupLoggerLevel();
|
||||
|
||||
// call the execute
|
||||
int result = clp.execute();
|
||||
CommandLineProgram.result = clp.execute();
|
||||
|
||||
// return the result
|
||||
System.exit(result);
|
||||
//System.exit(result); // todo -- is this safe -- why exit here? I want to run the GATK like normal
|
||||
}
|
||||
catch (ArgumentException e) {
|
||||
clp.parser.printHelp( clp.getApplicationDetails() );
|
||||
|
|
|
|||
|
|
@ -0,0 +1,123 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.junit.Test;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.security.MessageDigest;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
public class WalkerTest extends BaseTest {
|
||||
public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5 ) {
|
||||
try {
|
||||
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
|
||||
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
|
||||
BigInteger bigInt = new BigInteger(1, thedigest);
|
||||
String filemd5sum = bigInt.toString(16);
|
||||
|
||||
if ( parameterize() || expectedMD5.equals("") ) {
|
||||
logger.warn(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
|
||||
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
|
||||
} else {
|
||||
logger.warn(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
|
||||
Assert.assertEquals(name + "Mismatching MD5s", expectedMD5, filemd5sum);
|
||||
logger.warn(String.format(" => %s PASSED", name));
|
||||
}
|
||||
|
||||
return filemd5sum;
|
||||
} catch ( Exception e ) {
|
||||
throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile);
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s ) {
|
||||
List<String> md5s = new ArrayList<String>();
|
||||
for ( int i = 0; i < resultFiles.size(); i++ ) {
|
||||
String md5 = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
|
||||
md5s.add(i, md5);
|
||||
}
|
||||
|
||||
return md5s;
|
||||
}
|
||||
|
||||
|
||||
public static byte[] getBytesFromFile(File file) throws IOException {
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
// Get the size of the file
|
||||
long length = file.length();
|
||||
|
||||
if (length > Integer.MAX_VALUE) {
|
||||
// File is too large
|
||||
}
|
||||
|
||||
// Create the byte array to hold the data
|
||||
byte[] bytes = new byte[(int)length];
|
||||
|
||||
// Read in the bytes
|
||||
int offset = 0;
|
||||
int numRead = 0;
|
||||
while (offset < bytes.length
|
||||
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
|
||||
offset += numRead;
|
||||
}
|
||||
|
||||
// Ensure all the bytes have been read in
|
||||
if (offset < bytes.length) {
|
||||
throw new IOException("Could not completely read file "+file.getName());
|
||||
}
|
||||
|
||||
// Close the input stream and return bytes
|
||||
is.close();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public class WalkerTestSpec {
|
||||
String args = "";
|
||||
int nOutputFiles = -1;
|
||||
List<String> md5s = null;
|
||||
|
||||
public WalkerTestSpec(String args, int nOutputFiles, List<String> md5s) {
|
||||
this.args = args;
|
||||
this.nOutputFiles = nOutputFiles;
|
||||
this.md5s = md5s;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean parameterize() {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected List<String> executeTest(final String name, WalkerTestSpec spec) {
|
||||
List<File> tmpFiles = new ArrayList<File>();
|
||||
for ( int i = 0; i < spec.nOutputFiles; i++ ) {
|
||||
try {
|
||||
tmpFiles.add( File.createTempFile(String.format("walktest.tmp_param.%d", i), ".tmp" ) );
|
||||
} catch (IOException ex) {
|
||||
System.err.println("Cannot create temp file: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
final String args = String.format(spec.args, tmpFiles.toArray());
|
||||
logger.warn(Utils.dupString('-', 80));
|
||||
logger.warn(String.format("Executing test %s with GATK arguments: %s", name, args));
|
||||
CommandLineGATK instance = new CommandLineGATK();
|
||||
CommandLineExecutable.start(instance, args.split(" "));
|
||||
|
||||
return assertMatchingMD5s(name, tmpFiles, spec.md5s);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWalkerTest() {
|
||||
logger.warn("WalkerTest is just a framework");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,140 +1,164 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.genotype.Variant;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.junit.Test;
|
||||
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.executive.Accumulator;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.*;
|
||||
import java.security.MessageDigest;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
public class SingleSampleGenotyperTest extends BaseTest {
|
||||
private final static double DELTA = 1e-8;
|
||||
|
||||
private static final String oneMB = "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
|
||||
private static final String fiveMB = "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_15_mb.SLX.bam";
|
||||
private static String gatkargs(final String bam) {
|
||||
final String GATKArgs =
|
||||
"<GATK-argument-collection>\n" +
|
||||
" <sam-files class=\"java.util.ArrayList\">\n" +
|
||||
" <file>%s</file>\n" +
|
||||
" </sam-files>\n" +
|
||||
" <reference-file>/broad/1KG/reference/human_b36_both.fasta</reference-file>\n" +
|
||||
" <analysis-name>SingleSampleGenotyper</analysis-name>\n" +
|
||||
"</GATK-argument-collection>";
|
||||
return String.format(GATKArgs, bam);
|
||||
public class SingleSampleGenotyperTest extends WalkerTest {
|
||||
public static String baseTestString() {
|
||||
return "-T SingleSampleGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s";
|
||||
}
|
||||
|
||||
private static class ExpectedResult {
|
||||
long nCalls;
|
||||
String md5sum;
|
||||
public ExpectedResult(long calls, String md5) {
|
||||
this.nCalls = calls;
|
||||
this.md5sum = md5;
|
||||
}
|
||||
public static String testGeliLod5() {
|
||||
return baseTestString() + " --variant_output_format GELI -lod 5";
|
||||
}
|
||||
|
||||
public static EnumMap<BaseMismatchModel, ExpectedResult> expectedOutput = new EnumMap<BaseMismatchModel, ExpectedResult>(BaseMismatchModel.class);
|
||||
private static String OneMb1StateMD5 = "d5404668e76f206055f03d97162ea6d9";
|
||||
private static String OneMb3StateMD5 = "46fb7b66da3dac341e9c342f751d74cd";
|
||||
private static String OneMbEmpiricalMD5 = "ea0be2fd074a6c824a0670ad5b3e0aca";
|
||||
|
||||
static {
|
||||
expectedOutput.put(BaseMismatchModel.ONE_STATE, new ExpectedResult(983L, "d5404668e76f206055f03d97162ea6d9"));
|
||||
expectedOutput.put(BaseMismatchModel.THREE_STATE, new ExpectedResult(1055L, "46fb7b66da3dac341e9c342f751d74cd"));
|
||||
expectedOutput.put(BaseMismatchModel.EMPIRICAL, new ExpectedResult(1070L, "ea0be2fd074a6c824a0670ad5b3e0aca"));
|
||||
}
|
||||
|
||||
//private static double callingTolerance = 0.2; // I'm willing to tolerate +/- 10% variation in call rate from that expected
|
||||
|
||||
public static long nExpectedCalls(BaseMismatchModel model) {
|
||||
return expectedOutput.get(model).nCalls;
|
||||
}
|
||||
|
||||
// public static double nExpectedCallsTolerance(long nBasesCalled) {
|
||||
// return nExpectedCalls(nBasesCalled) * callingTolerance;
|
||||
// private static String oneMbMD5(BaseMismatchModel m) {
|
||||
// switch (m) {
|
||||
// case ONE_STATE: return OneMb1StateMD5;
|
||||
// case THREE_STATE: return OneMb3StateMD5;
|
||||
// case EMPIRICAL: return OneMbEmpiricalMD5;
|
||||
// default: throw new RuntimeException("Unexpected BaseMismatchModel " + m);
|
||||
// }
|
||||
// }
|
||||
|
||||
public static void assertGoodNumberOfCalls(final String name, BaseMismatchModel model, SingleSampleGenotyper.CallResult calls) {
|
||||
Assert.assertEquals(name, nExpectedCalls(model), calls.nConfidentCalls);
|
||||
}
|
||||
// Uncomment to not check outputs against expectations
|
||||
//protected boolean parameterize() {
|
||||
// return true;
|
||||
//}
|
||||
|
||||
public static void assertMatchingMD5(final String name, BaseMismatchModel model, final File resultsFile ) {
|
||||
try {
|
||||
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
|
||||
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
|
||||
BigInteger bigInt = new BigInteger(1, thedigest);
|
||||
String filemd5sum = bigInt.toString(16);
|
||||
Assert.assertEquals(name + "Mismatching MD5s", expectedOutput.get(model).md5sum, filemd5sum);
|
||||
} catch ( Exception e ) {
|
||||
throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile);
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing the cache
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testCache() {
|
||||
for ( BaseMismatchModel model : BaseMismatchModel.values() ) {
|
||||
// calculated the expected value without the cache enabled
|
||||
WalkerTest.WalkerTestSpec withoutCacheSpec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 --disableCache -m " + model.toString(), 1,
|
||||
Arrays.asList(""));
|
||||
List<String> withoutCache = executeTest("empirical1MbTest", withoutCacheSpec );
|
||||
|
||||
WalkerTest.WalkerTestSpec withCacheSpec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m " + model.toString(), 1,
|
||||
withoutCache);
|
||||
executeTest(String.format("testCache[%s]", model), withCacheSpec );
|
||||
}
|
||||
}
|
||||
|
||||
public static byte[] getBytesFromFile(File file) throws IOException {
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
// Get the size of the file
|
||||
long length = file.length();
|
||||
|
||||
if (length > Integer.MAX_VALUE) {
|
||||
// File is too large
|
||||
}
|
||||
|
||||
// Create the byte array to hold the data
|
||||
byte[] bytes = new byte[(int)length];
|
||||
|
||||
// Read in the bytes
|
||||
int offset = 0;
|
||||
int numRead = 0;
|
||||
while (offset < bytes.length
|
||||
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
|
||||
offset += numRead;
|
||||
}
|
||||
|
||||
// Ensure all the bytes have been read in
|
||||
if (offset < bytes.length) {
|
||||
throw new IOException("Could not completely read file "+file.getName());
|
||||
}
|
||||
|
||||
// Close the input stream and return bytes
|
||||
is.close();
|
||||
return bytes;
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing genotype mode
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void genotypeTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1,
|
||||
Arrays.asList("7e5dec6481bbbc890493925da9a8f691"));
|
||||
executeTest("genotypeTest", spec);
|
||||
}
|
||||
|
||||
private void oneMBCallTest(BaseMismatchModel model) {
|
||||
logger.warn("Executing oneMBCallTest for " + model);
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// basic base calling models
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
InputStream stream = new ByteArrayInputStream(gatkargs(oneMB).getBytes());
|
||||
GATKArgumentCollection mycoll = GATKArgumentCollection.unmarshal(stream);
|
||||
mycoll.intervals = Arrays.asList("1:10,000,000-11,000,000");
|
||||
|
||||
SingleSampleGenotyper SSG = new SingleSampleGenotyper();
|
||||
SSG.VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
SSG.LOD_THRESHOLD = 5.0;
|
||||
|
||||
try {
|
||||
SSG.VARIANTS_FILE = File.createTempFile("SSGTempTmpCalls.geli.calls", ".tmp" );
|
||||
} catch (IOException ex) {
|
||||
System.err.println("Cannot create temp file: " + ex.getMessage());
|
||||
}
|
||||
SSG.baseModel = model;
|
||||
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
System.out.printf("model is %s, SSG says %s%n", model, SSG.baseModel );
|
||||
Accumulator obj = (Accumulator)engine.execute(mycoll, SSG);
|
||||
SingleSampleGenotyper.CallResult calls = (SingleSampleGenotyper.CallResult)obj.finishTraversal();
|
||||
logger.warn(String.format("SSG(%s): made %d calls at %d bases", SSG.baseModel, calls.nConfidentCalls, calls.nCalledBases));
|
||||
assertMatchingMD5("testBasic", model, SSG.VARIANTS_FILE);
|
||||
assertGoodNumberOfCalls("testBasic", model, calls);
|
||||
@Test
|
||||
public void oneState100bpTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,000,100 -m one_state", 1, Arrays.asList("3cd402d889c015be4a318123468f4262"));
|
||||
executeTest("oneState100bpTest", spec);
|
||||
}
|
||||
|
||||
@Test public void oneStateOneMBTest() { oneMBCallTest(BaseMismatchModel.ONE_STATE); }
|
||||
@Test public void threeStateOneMBTest() { oneMBCallTest(BaseMismatchModel.THREE_STATE); }
|
||||
@Test public void empiricalStateOneMBTest() { oneMBCallTest(BaseMismatchModel.EMPIRICAL); }
|
||||
@Test
|
||||
public void oneState1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m one_state",
|
||||
1, Arrays.asList(OneMb1StateMD5));
|
||||
executeTest("oneState1MbTest", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void threeState1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m three_state", 1,
|
||||
Arrays.asList(OneMb3StateMD5));
|
||||
executeTest("threeState1MbTest", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void empirical1MbTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m empirical", 1,
|
||||
Arrays.asList(OneMbEmpiricalMD5));
|
||||
executeTest("empirical1MbTest", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing output formats
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
//@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File format to be used", required = false)
|
||||
//public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing LOD thresholding
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testLOD() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 10.0, "e4c51dca6f1fa999f4399b7412829534" );
|
||||
e.put( 3.0, "d804c24d49669235e3660e92e664ba1a" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -m EMPIRICAL -lod " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest("testLOD", spec);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing hetero setting
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "ca9986b32aac0d6ad6058f4bf10e7df2" );
|
||||
e.put( 0.0001, "55d4e3e73215b70b22a8e689a4e16d37" );
|
||||
e.put( 1.0 / 1850, "1ae2126f1a6490d6edd15d95bce726c4" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-11,000,000 -m EMPIRICAL --heterozygosity " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue