From d6e2e89f9994f45182071962f3d140a0569dc865 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 18 Jul 2011 10:46:01 -0400 Subject: [PATCH] Walker test system refactoring. All MD5DB related functions are now in MD5DB.java. System has the concept of a local and a global MD5 db. The local one is like it operated previously. The global one lives in /humgen/gsa-hpprojects/GATK/data/integrationtests. If the system can find this directory then MD5s will also be read / written to this location. This means that gsabamboo will print differences as appropriate. And all users will in effect have access to a complete history of MD5 file results. A few minor code reshuffles changed VariantRecalibration and VCFHeader test files. --- .../org/broadinstitute/sting/BaseTest.java | 206 +-------------- .../test/org/broadinstitute/sting/MD5DB.java | 247 ++++++++++++++++++ .../org/broadinstitute/sting/WalkerTest.java | 4 +- ...ntRecalibrationWalkersIntegrationTest.java | 5 +- .../CombineVariantsIntegrationTest.java | 3 +- .../utils/genotype/vcf/VCFHeaderUnitTest.java | 55 +++- 6 files changed, 306 insertions(+), 214 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/MD5DB.java diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index b3e422ba9..ef46d4bff 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -13,10 +13,7 @@ import java.io.*; import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * @@ -83,11 +80,6 @@ public abstract class BaseTest { public static final String networkTempDir = "/broad/shptmp/"; public static final File networkTempDirFile = new File(networkTempDir); - /** - * Subdirectory under the ant build directory where we store integration test md5 results - */ - public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "public/testdata/"; /** before the class starts up */ @@ -129,7 +121,7 @@ public abstract class BaseTest { * 2: Create instances of your subclass. Return from it the call to getTests, providing * the class type of your test * - * @DataProvider(name = "summaries") + * @DataProvider(name = "summaries" * public Object[][] createSummaries() { * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2"); * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1"); @@ -206,200 +198,6 @@ public abstract class BaseTest { } } - /** - * a little utility function for all tests to md5sum a file - * Shameless taken from: - * - * http://www.javalobby.org/java/forums/t84420.html - * - * @param file the file - * @return a string - */ - public static String md5SumFile(File file) { - MessageDigest digest; - try { - digest = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to find MD5 digest"); - } - InputStream is; - try { - is = new FileInputStream(file); - } catch (FileNotFoundException e) { - throw new ReviewedStingException("Unable to open file " + file); - } - byte[] buffer = new byte[8192]; - int read; - try { - while ((read = is.read(buffer)) > 0) { - digest.update(buffer, 0, read); - } - byte[] md5sum = digest.digest(); - BigInteger bigInt = new BigInteger(1, md5sum); - return bigInt.toString(16); - - } - catch (IOException e) { - throw new ReviewedStingException("Unable to process file for MD5", e); - } - finally { - try { - is.close(); - } - catch (IOException e) { - throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); - } - } - } - - protected static void ensureMd5DbDirectory() { - // todo -- make path - File dir = new File(MD5_FILE_DB_SUBDIR); - if ( ! dir.exists() ) { - System.out.printf("##### Creating MD5 db %s%n", MD5_FILE_DB_SUBDIR); - if ( ! dir.mkdir() ) { - throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + MD5_FILE_DB_SUBDIR); - } - } - } - - protected static File getFileForMD5(final String md5) { - final String basename = String.format("%s.integrationtest", md5); - return new File(MD5_FILE_DB_SUBDIR + "/" + basename); - } - - private static void updateMD5Db(final String md5, final File resultsFile) { - // todo -- copy results file to DB dir if needed under filename for md5 - final File dbFile = getFileForMD5(md5); - if ( ! dbFile.exists() ) { - // the file isn't already in the db, copy it over - System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); - try { - FileUtils.copyFile(resultsFile, dbFile); - } catch ( IOException e ) { - throw new ReviewedStingException(e.getMessage()); - } - } else { - System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); - - } - } - - private static String getMD5Path(final String md5, final String valueIfNotFound) { - // todo -- look up the result in the directory and return the path if it exists - final File dbFile = getFileForMD5(md5); - return dbFile.exists() ? dbFile.getPath() : valueIfNotFound; - } - - public static byte[] getBytesFromFile(File file) throws IOException { - InputStream is = new FileInputStream(file); - - // Get the size of the file - long length = file.length(); - - if (length > Integer.MAX_VALUE) { - // File is too large - } - - // Create the byte array to hold the data - byte[] bytes = new byte[(int) length]; - - // Read in the bytes - int offset = 0; - int numRead = 0; - while (offset < bytes.length - && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { - offset += numRead; - } - - // Ensure all the bytes have been read in - if (offset < bytes.length) { - throw new IOException("Could not completely read file " + file.getName()); - } - - // Close the input stream and return bytes - is.close(); - return bytes; - } - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); - - if (parameterize || expectedMD5.equals("")) { - // Don't assert - } else if ( filemd5sum.equals(expectedMD5) ) { - System.out.println(String.format(" => %s PASSED", name)); - } else { - Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); - } - - - - return filemd5sum; - } - - - /** - * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. - * @param name Name of the test. - * @param resultsFile File to MD5. - * @param expectedMD5 Expected MD5 value. - * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. - * @return The calculated MD5. - */ - public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { - try { - byte[] bytesOfMessage = getBytesFromFile(resultsFile); - byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); - BigInteger bigInt = new BigInteger(1, thedigest); - String filemd5sum = bigInt.toString(16); - while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 - - // - // copy md5 to integrationtests - // - updateMD5Db(filemd5sum, resultsFile); - - if (parameterize || expectedMD5.equals("")) { - System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", - name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); - } else { - System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); - System.out.flush(); - - if ( ! expectedMD5.equals(filemd5sum) ) { - // we are going to fail for real in assertEquals (so we are counted by the testing framework). - // prepare ourselves for the comparison - System.out.printf("##### Test %s is going fail #####%n", name); - String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]"); - String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]"); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); - System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); - System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); - - // inline differences - DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); - boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); - if ( success ) - System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", - pathToExpectedMD5File, pathToFileMD5File); - } - } - - return filemd5sum; - } catch (Exception e) { - throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); - } - } - /** * Creates a temp file that will be deleted on exit after tests are complete. * @param name Prefix of the file. diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java new file mode 100644 index 000000000..bea9eaec5 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; + +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/18/11 + * Time: 9:10 AM + * + * Utilities for manipulating the MD5 database of previous results + */ +public class MD5DB { + /** + * Subdirectory under the ant build directory where we store integration test md5 results + */ + public static final String LOCAL_MD5_DB_DIR = "integrationtests"; + public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; + + // ---------------------------------------------------------------------- + // + // MD5 DB stuff + // + // ---------------------------------------------------------------------- + + /** + * Create the MD5 file directories if necessary + */ + protected static void ensureMd5DbDirectory() { + File dir = new File(LOCAL_MD5_DB_DIR); + if ( ! dir.exists() ) { + System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR); + if ( ! dir.mkdir() ) { + throw new ReviewedStingException("Infrastructure failure: failed to create md5 directory " + LOCAL_MD5_DB_DIR); + } + } + } + + /** + * Returns the path to an already existing file with the md5 contents, or valueIfNotFound + * if no such file exists in the db. + * + * @param md5 + * @param valueIfNotFound + * @return + */ + public static String getMD5FilePath(final String md5, final String valueIfNotFound) { + // we prefer the local db to the global DB, so match it first + for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) { + File f = getFileForMD5(md5, dir); + if ( f.exists() && f.canRead() ) + return f.getPath(); + } + + return valueIfNotFound; + } + + /** + * Utility function that given a file's md5 value and the path to the md5 db, + * returns the canonical name of the file. For example, if md5 is XXX and db is YYY, + * this will return YYY/XXX.integrationtest + * + * @param md5 + * @param dbPath + * @return + */ + private static File getFileForMD5(final String md5, final String dbPath) { + final String basename = String.format("%s.integrationtest", md5); + return new File(dbPath + "/" + basename); + } + + /** + * Copies the results file with md5 value to its canonical file name and db places + * + * @param md5 + * @param resultsFile + */ + private static void updateMD5Db(final String md5, final File resultsFile) { + copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile); + copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile); + } + + /** + * Low-level utility routine that copies resultsFile to dbFile + * @param dbFile + * @param resultsFile + */ + private static void copyFileToDB(File dbFile, final File resultsFile) { + if ( ! dbFile.exists() ) { + // the file isn't already in the db, copy it over + System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath()); + try { + FileUtils.copyFile(resultsFile, dbFile); + } catch ( IOException e ) { + System.out.printf("##### Skipping update, cannot write file %s%n", dbFile); + } + } else { + System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath()); + } + } + + /** + * Returns the byte[] of the entire contents of file, for md5 calculations + * @param file + * @return + * @throws IOException + */ + private static byte[] getBytesFromFile(File file) throws IOException { + InputStream is = new FileInputStream(file); + + // Get the size of the file + long length = file.length(); + + if (length > Integer.MAX_VALUE) { + // File is too large + } + + // Create the byte array to hold the data + byte[] bytes = new byte[(int) length]; + + // Read in the bytes + int offset = 0; + int numRead = 0; + while (offset < bytes.length + && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { + offset += numRead; + } + + // Ensure all the bytes have been read in + if (offset < bytes.length) { + throw new IOException("Could not completely read file " + file.getName()); + } + + // Close the input stream and return bytes + is.close(); + return bytes; + } + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize); + + if (parameterize || expectedMD5.equals("")) { + // Don't assert + } else if ( filemd5sum.equals(expectedMD5) ) { + System.out.println(String.format(" => %s PASSED", name)); + } else { + Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum)); + } + + return filemd5sum; + } + + + /** + * Tests a file MD5 against an expected value, returning the MD5. NOTE: This function WILL NOT throw an exception if the MD5s are different. + * @param name Name of the test. + * @param resultsFile File to MD5. + * @param expectedMD5 Expected MD5 value. + * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text. + * @return The calculated MD5. + */ + public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { + try { + byte[] bytesOfMessage = getBytesFromFile(resultsFile); + byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); + BigInteger bigInt = new BigInteger(1, thedigest); + String filemd5sum = bigInt.toString(16); + while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 + + // + // copy md5 to integrationtests + // + updateMD5Db(filemd5sum, resultsFile); + + if (parameterize || expectedMD5.equals("")) { + System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b", + name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5))); + } else { + System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5)); + System.out.flush(); + + if ( ! expectedMD5.equals(filemd5sum) ) { + // we are going to fail for real in assertEquals (so we are counted by the testing framework). + // prepare ourselves for the comparison + System.out.printf("##### Test %s is going fail #####%n", name); + String pathToExpectedMD5File = getMD5FilePath(expectedMD5, "[No DB file found]"); + String pathToFileMD5File = getMD5FilePath(filemd5sum, "[No DB file found]"); + System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); + System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); + System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); + + // inline differences + DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); + if ( success ) + System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", + pathToExpectedMD5File, pathToFileMD5File); + } + } + + return filemd5sum; + } catch (Exception e) { + throw new RuntimeException("Failed to read bytes from calls file: " + resultsFile, e); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index 22635dfa3..386c17659 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -53,7 +53,7 @@ public class WalkerTest extends BaseTest { } public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) { - return assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); + return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize()); } public void maybeValidateSupplementaryFile(final String name, final File resultFile) { @@ -191,7 +191,7 @@ public class WalkerTest extends BaseTest { } protected Pair, List> executeTest(final String name, WalkerTestSpec spec) { - ensureMd5DbDirectory(); // ensure the md5 directory exists + MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists List tmpFiles = new ArrayList(); for (int i = 0; i < spec.nOutputFiles; i++) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 2fec2e70f..057053a1c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broadinstitute.sting.MD5DB; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; @@ -65,8 +66,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -NO_HEADER" + " -B:input,VCF " + params.inVCF + " -o %s" + - " -tranchesFile " + getFileForMD5(params.tranchesMD5) + - " -recalFile " + getFileForMD5(params.recalMD5), + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), Arrays.asList(params.cutVCFMD5)); executeTest("testApplyRecalibration-"+params.inVCF, spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index fb18c6c33..00ee44f75 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -70,7 +70,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } @@ -119,7 +118,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "64b991fd3850f83614518f7d71f0532f"); } + @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index c4ca6a551..14e63191d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -2,15 +2,16 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringBufferInputStream; +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; /** * Created by IntelliJ IDEA. @@ -40,6 +41,52 @@ public class VCFHeaderUnitTest extends BaseTest { checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088"); } + /** + * a little utility function for all tests to md5sum a file + * Shameless taken from: + * + * http://www.javalobby.org/java/forums/t84420.html + * + * @param file the file + * @return a string + */ + private static String md5SumFile(File file) { + MessageDigest digest; + try { + digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ReviewedStingException("Unable to find MD5 digest"); + } + InputStream is; + try { + is = new FileInputStream(file); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Unable to open file " + file); + } + byte[] buffer = new byte[8192]; + int read; + try { + while ((read = is.read(buffer)) > 0) { + digest.update(buffer, 0, read); + } + byte[] md5sum = digest.digest(); + BigInteger bigInt = new BigInteger(1, md5sum); + return bigInt.toString(16); + + } + catch (IOException e) { + throw new ReviewedStingException("Unable to process file for MD5", e); + } + finally { + try { + is.close(); + } + catch (IOException e) { + throw new ReviewedStingException("Unable to close input stream for MD5 calculation", e); + } + } + } + private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) { File myTempFile = null; PrintWriter pw = null;