MD5DB for integrationtest management now writes out a md5mismatches files for clean analysis

-- This file is in integrationtests/md5mismatches.txt, and looks like:

expected        observed        test
7fd0d0c2d1af3b16378339c181e40611        2339d841d3c3c7233ebba9a6ace895fd        test BeagleOutputToVCF
43865f3f0d975ee2c5912b31393842f8        1b9c4734274edd3142a05033e520beac        testBeagleChangesSitesToRef
daead9bfab1a5df72c5e3a239366118e        27be14f9fc951c4e714b4540b045c2df        testDiffObjects:master=/local/dev/depristo/itest/public/testdata/diffTestMaster.vcf,test=/local/dev/depristo/itest/public/testdata/diffTestTest.vcf,md5=daead9bfab1a5df72c5e3a239366118e

-- Associated cleanup with making md5db an instantiated object, rather than a bunch of static methods
This commit is contained in:
Mark DePristo 2012-06-06 14:02:01 -04:00
parent aef4c5eec1
commit 982192e2e4
4 changed files with 68 additions and 20 deletions

View File

@ -25,14 +25,13 @@
package org.broadinstitute.sting;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
/**
@ -44,6 +43,8 @@ import java.util.Arrays;
* Utilities for manipulating the MD5 database of previous results
*/
public class MD5DB {
public static final Logger logger = Logger.getLogger(MD5DB.class);
/**
* Subdirectory under the ant build directory where we store integration test md5 results
*/
@ -52,6 +53,36 @@ public class MD5DB {
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
// tracking and emitting a data file of origina and new md5s
private final File MD5MismatchesFile;
private final PrintStream md5MismatchStream;
public MD5DB() {
this(new File(MD5DB.LOCAL_MD5_DB_DIR + "/md5mismatches.txt"));
}
public MD5DB(final File MD5MismatchesFile) {
this.MD5MismatchesFile = MD5MismatchesFile;
ensureMd5DbDirectory();
logger.debug("Creating md5 mismatch db at " + MD5MismatchesFile);
try {
md5MismatchStream = new PrintStream(new FileOutputStream(MD5MismatchesFile));
md5MismatchStream.printf("%s\t%s\t%s%n", "expected", "observed", "test");
} catch ( FileNotFoundException e ) {
throw new ReviewedStingException("Failed to open md5 mismatch file", e);
}
}
public void close() {
if ( md5MismatchStream != null ) {
logger.debug("Closeing md5 mismatch db at " + MD5MismatchesFile);
md5MismatchStream.close();
}
}
// ----------------------------------------------------------------------
//
// MD5 DB stuff
@ -61,7 +92,7 @@ public class MD5DB {
/**
* Create the MD5 file directories if necessary
*/
protected static void ensureMd5DbDirectory() {
private void ensureMd5DbDirectory() {
File dir = new File(LOCAL_MD5_DB_DIR);
if ( ! dir.exists() ) {
System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR);
@ -79,7 +110,7 @@ public class MD5DB {
* @param valueIfNotFound
* @return
*/
public static String getMD5FilePath(final String md5, final String valueIfNotFound) {
public String getMD5FilePath(final String md5, final String valueIfNotFound) {
// we prefer the global db to the local DB, so match it first
for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) {
File f = getFileForMD5(md5, dir);
@ -99,7 +130,7 @@ public class MD5DB {
* @param dbPath
* @return
*/
private static File getFileForMD5(final String md5, final String dbPath) {
private File getFileForMD5(final String md5, final String dbPath) {
final String basename = String.format("%s.integrationtest", md5);
return new File(dbPath + "/" + basename);
}
@ -110,7 +141,7 @@ public class MD5DB {
* @param md5
* @param resultsFile
*/
private static void updateMD5Db(final String md5, final File resultsFile) {
private void updateMD5Db(final String md5, final File resultsFile) {
copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile);
copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile);
}
@ -120,7 +151,7 @@ public class MD5DB {
* @param dbFile
* @param resultsFile
*/
private static void copyFileToDB(File dbFile, final File resultsFile) {
private void copyFileToDB(File dbFile, final File resultsFile) {
if ( ! dbFile.exists() ) {
// the file isn't already in the db, copy it over
System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath());
@ -192,7 +223,7 @@ public class MD5DB {
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
* @return The calculated MD5.
*/
public static MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
public MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
final String actualMD5 = testFileMD5(name, resultsFile, expectedMD5, parameterize);
String failMessage = null;
boolean failed = false;
@ -218,7 +249,7 @@ public class MD5DB {
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
* @return The calculated MD5.
*/
public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
public String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
try {
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
@ -247,8 +278,10 @@ public class MD5DB {
BaseTest.log(String.format("calculated %s", filemd5sum));
BaseTest.log(String.format("diff %s %s", pathToExpectedMD5File, pathToFileMD5File));
md5MismatchStream.printf("%s\t%s\t%s%n", expectedMD5, filemd5sum, name);
md5MismatchStream.flush();
// inline differences
// TODO -- capture output and put in log
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final PrintStream ps = new PrintStream(baos);
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RAW_DIFFS_TO_SUMMARIZE);

View File

@ -40,10 +40,13 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
import java.io.*;
import org.testng.Assert;
import org.testng.annotations.AfterSuite;
import org.testng.annotations.BeforeMethod;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.*;
@ -52,13 +55,26 @@ public class WalkerTest extends BaseTest {
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
private static final boolean ENABLE_ON_THE_FLY_CHECK_FOR_VCF_INDEX = false;
private static MD5DB md5DB = new MD5DB();
@BeforeMethod
public void initializeRandomGenerator() {
public void initializeWalkerTests() {
logger.debug("Initializing walker tests");
GenomeAnalysisEngine.resetRandomGenerator();
}
@AfterSuite
public void finalizeWalkerTests() {
logger.debug("Finalizing walker tests");
md5DB.close();
}
public static MD5DB getMd5DB() {
return md5DB;
}
public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
return getMd5DB().assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
}
public void validateOutputBCFIfPossible(final String name, final File resultFile) {
@ -257,8 +273,6 @@ public class WalkerTest extends BaseTest {
}
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists
List<File> tmpFiles = new ArrayList<File>();
for (int i = 0; i < spec.nOutputFiles; i++) {
String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i);

View File

@ -67,8 +67,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" --no_cmdline_in_header" +
" -input " + params.inVCF +
" -o %s" +
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
Arrays.asList(params.cutVCFMD5));
executeTest("testApplyRecalibration-"+params.inVCF, spec);
}
@ -115,8 +115,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" --no_cmdline_in_header" +
" -input " + params.inVCF +
" -o %s" +
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
Arrays.asList(params.cutVCFMD5));
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
}

View File

@ -42,6 +42,7 @@ import org.broadinstitute.sting.queue.engine.CommandLinePluginManager
object PipelineTest extends BaseTest with Logging {
private val validationReportsDataLocation = "/humgen/gsa-hpprojects/GATK/validationreports/submitted/"
private val md5DB = new MD5DB()
final val run = System.getProperty("pipeline.run") == "run"
@ -111,7 +112,7 @@ object PipelineTest extends BaseTest with Logging {
private def assertMatchingMD5s(name: String, fileMD5s: Traversable[(File, String)], parameterize: Boolean) {
var failed = 0
for ((file, expectedMD5) <- fileMD5s) {
val calculatedMD5 = MD5DB.testFileMD5(name, file, expectedMD5, parameterize)
val calculatedMD5 = md5DB.testFileMD5(name, file, expectedMD5, parameterize)
if (!parameterize && expectedMD5 != "" && expectedMD5 != calculatedMD5)
failed += 1
}