gatk-3.8/java/test/org/broadinstitute/sting/BaseTest.java

366 lines
15 KiB
Java
Executable File

package org.broadinstitute.sting;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.*;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
/**
*
* User: aaron
* Date: Apr 14, 2009
* Time: 10:24:30 AM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 14, 2009
* <p/>
* Class BaseTest
* <p/>
* This is the base test class for all of our test cases. All test cases should extend from this
* class; it sets up the logger, and resolves the location of directories that we rely on.
*/
@SuppressWarnings("unchecked")
public abstract class BaseTest {
/** our log, which we want to capture anything from org.broadinstitute.sting */
public static final Logger logger = CommandLineUtils.getStingLogger();
public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
public static final String comparisonDataLocation = GATKDataLocation + "Comparisons/";
public static final String annotationDataLocation = GATKDataLocation + "Annotations/";
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";
public static final String hg19Refseq = refseqAnnotationLocation + "refGene-big-table-hg19.txt";
public static final String b36Refseq = refseqAnnotationLocation + "refGene-big-table-b36.txt";
public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt";
public static final String dbsnpDataLocation = GATKDataLocation;
public static final String hg18dbSNP129 = dbsnpDataLocation + "dbsnp_129_hg18.rod";
public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.rod";
public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.rod";
public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf";
public static final String intervalsLocation = GATKDataLocation;
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
public static final String networkTempDir = "/broad/shptmp/";
public static final File networkTempDirFile = new File(networkTempDir);
/**
* Subdirectory under the ant build directory where we store integration test md5 results
*/
public static final String MD5_FILE_DB_SUBDIR = "integrationtests";
public static final String testDir = "testdata/";
/** before the class starts up */
static {
// setup a basic log configuration
CommandLineUtils.configureConsoleLogging();
// setup our log layout
PatternLayout layout = new PatternLayout();
layout.setConversionPattern("TEST %C{1}.%M - %d{HH:mm:ss,SSS} - %m%n");
// now set the layout of all the loggers to our layout
CommandLineUtils.setLayout(logger, layout);
// Set the Root logger to only output warnings.
logger.setLevel(Level.WARN);
// find our file sources
if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
logger.fatal("We can't locate the reference directories. Aborting!");
throw new RuntimeException("BaseTest setup failed: unable to locate the reference directories");
}
}
/**
* test if the file exists
*
* @param file name as a string
* @return true if it exists
*/
public static boolean fileExist(String file) {
File temp = new File(file);
return temp.exists();
}
/**
* this appender looks for a specific message in the log4j stream.
* It can be used to verify that a specific message was generated to the logging system.
*/
public static class ValidationAppender extends AppenderSkeleton {
private boolean foundString = false;
private String targetString = "";
public ValidationAppender(String target) {
targetString = target;
}
@Override
protected void append(LoggingEvent loggingEvent) {
if (loggingEvent.getMessage().equals(targetString))
foundString = true;
}
public void close() {
// do nothing
}
public boolean requiresLayout() {
return false;
}
public boolean foundString() {
return foundString;
}
}
/**
* a little utility function for all tests to md5sum a file
* Shameless taken from:
*
* http://www.javalobby.org/java/forums/t84420.html
*
* @param file the file
* @return a string
*/
public static String md5SumFile(File file) {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new ReviewedStingException("Unable to find MD5 digest");
}
InputStream is;
try {
is = new FileInputStream(file);
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Unable to open file " + file);
}
byte[] buffer = new byte[8192];
int read;
try {
while ((read = is.read(buffer)) > 0) {
digest.update(buffer, 0, read);
}
byte[] md5sum = digest.digest();
BigInteger bigInt = new BigInteger(1, md5sum);
return bigInt.toString(16);
}
catch (IOException e) {
throw new ReviewedStingException("Unable to process file for MD5", e);
}
finally {
try {
is.close();
}
catch (IOException e) {
throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e);
}
}
}
protected static void ensureMd5DbDirectory() {
// todo -- make path
File dir = new File(MD5_FILE_DB_SUBDIR);
if ( ! dir.exists() ) {
System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR);
if ( ! dir.mkdir() ) {
throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR);
}
}
}
protected static File getFileForMD5(final String md5) {
final String basename = String.format("%s.integrationtest", md5);
return new File(MD5_FILE_DB_SUBDIR + "/" + basename);
}
private static void updateMD5Db(final String md5, final File resultsFile) {
// todo -- copy results file to DB dir if needed under filename for md5
final File dbFile = getFileForMD5(md5);
if ( ! dbFile.exists() ) {
// the file isn't already in the db, copy it over
System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath());
try {
FileUtils.copyFile(resultsFile, dbFile);
} catch ( IOException e ) {
throw new ReviewedStingException(e.getMessage());
}
} else {
System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
}
}
private static String getMD5Path(final String md5, final String valueIfNotFound) {
// todo -- look up the result in the directory and return the path if it exists
final File dbFile = getFileForMD5(md5);
return dbFile.exists() ? dbFile.getPath() : valueIfNotFound;
}
public static byte[] getBytesFromFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
// Get the size of the file
long length = file.length();
if (length > Integer.MAX_VALUE) {
// File is too large
}
// Create the byte array to hold the data
byte[] bytes = new byte[(int) length];
// Read in the bytes
int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
offset += numRead;
}
// Ensure all the bytes have been read in
if (offset < bytes.length) {
throw new IOException("Could not completely read file " + file.getName());
}
// Close the input stream and return bytes
is.close();
return bytes;
}
/**
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different.
* @param name Name of the test.
* @param resultsFile File to MD5.
* @param expectedMD5 Expected MD5 value.
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
* @return The calculated MD5.
*/
public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
if (parameterize || expectedMD5.equals("")) {
// Don't assert
} else {
Assert.assertEquals(filemd5sum, expectedMD5, name + " Mismatching MD5s");
System.out.println(String.format(" => %s PASSED", name));
}
return filemd5sum;
}
/**
* Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different.
* @param name Name of the test.
* @param resultsFile File to MD5.
* @param expectedMD5 Expected MD5 value.
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
* @return The calculated MD5.
*/
public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
try {
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
BigInteger bigInt = new BigInteger(1, thedigest);
String filemd5sum = bigInt.toString(16);
while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32
//
// copy md5 to integrationtests
//
updateMD5Db(filemd5sum, resultsFile);
if (parameterize || expectedMD5.equals("")) {
System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
} else {
System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
System.out.flush();
if ( ! expectedMD5.equals(filemd5sum) ) {
// we are going to fail for real in assertEquals (so we are counted by the testing framework).
// prepare ourselves for the comparison
System.out.printf("##### Test %s is going fail #####%n", name);
String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]");
String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]");
System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File);
System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File);
System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File);
// todo -- add support for simple inline display of the first N differences for text file
}
}
return filemd5sum;
} catch (Exception e) {
throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e);
}
}
/**
* Creates a temp file that will be deleted on exit after tests are complete.
* @param name Prefix of the file.
* @param extension Extension to concat to the end of the file.
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static File createTempFile(String name, String extension) {
try {
File file = File.createTempFile(name, extension);
file.deleteOnExit();
return file;
} catch (IOException ex) {
throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex);
}
}
/**
* Creates a temp file that will be deleted on exit after tests are complete.
* @param name Prefix of the file.
* @param extension Extension to concat to the end of the file.
* @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static File createNetworkTempFile(String name, String extension) {
try {
File file = File.createTempFile(name, extension, networkTempDirFile);
file.deleteOnExit();
return file;
} catch (IOException ex) {
throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex);
}
}
}