Add HaplotypeCaller argument so integration tests can specify the hardware dependent PairHMM sub-implementation

This commit is contained in:
Ron Levine 2014-11-14 11:30:47 -05:00
parent ba5a32107f
commit 386aeda022
16 changed files with 232 additions and 120 deletions

View File

@ -106,9 +106,9 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
if ( !(walker instanceof UnifiedGenotyper) ) {
if ( !walkerIdentityCheckWarningLogged ) {
if ( walker != null )
logger.warn("Must be called from UnifiedGenotyper, not " + walker.getClass().getName());
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper, not " + walker.getClass().getName());
else
logger.warn("Must be called from UnifiedGenotyper");
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
walkerIdentityCheckWarningLogged = true;
}
return null;

View File

@ -557,6 +557,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING;
@Hidden
@Advanced
@Argument(fullName = "pair_hmm_sub_implementation", shortName = "pairHMMSub", doc = "The PairHMM machine dependent sub-implementation to use for genotype likelihood calculations. Usage is intended for test suite use only, normal usage you should rely on the architecture auto-detection", required = false)
public PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub = PairHMM.HMM_SUB_IMPLEMENTATION.ENABLE_ALL;
@Hidden
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
protected String keepRG = null;
@ -887,7 +892,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() {
switch (likelihoodEngineImplementation) {
case PairHMM:
return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, pairHMM, log10GlobalReadMismappingRate, noFpga, pcrErrorModel );
return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, pairHMM, pairHMMSub, log10GlobalReadMismappingRate, noFpga, pcrErrorModel );
case GraphBased:
return new GraphBasedLikelihoodCalculationEngine( (byte)gcpHMM,log10GlobalReadMismappingRate, heterogeneousKmerSizeResolution,SCAC.DEBUG,debugGraphTransformations);
case Random:

View File

@ -83,6 +83,7 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
private final double log10globalReadMismappingRate;
private final PairHMM.HMM_IMPLEMENTATION hmmType;
private final PairHMM.HMM_SUB_IMPLEMENTATION hmmSubType;
private final boolean noFpga;
private final ThreadLocal<PairHMM> pairHMMThreadLocal = new ThreadLocal<PairHMM>() {
@ -99,15 +100,15 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
case VECTOR_LOGLESS_CACHING:
try
{
return new VectorLoglessPairHMM();
return new VectorLoglessPairHMM(hmmSubType);
}
catch(UnsatisfiedLinkError ule)
{
logger.debug("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
logger.warn("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
return new LoglessPairHMM();
}
case DEBUG_VECTOR_LOGLESS_CACHING:
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING, hmmSubType);
case ARRAY_LOGLESS:
if (noFpga || !CnyPairHMM.isAvailable())
return new ArrayLoglessPairHMM();
@ -148,6 +149,7 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
*
* @param constantGCP the gap continuation penalty to use with the PairHMM
* @param hmmType the type of the HMM to use
* @param hmmSubType the type of the machine dependent sub-implementation of HMM to use
* @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of
* -3 means that the chance that a read doesn't actually belong at this
* location in the genome is 1 in 1000. The effect of this parameter is
@ -157,9 +159,12 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* reference haplotype gets a score of -100 from the pairhmm it will be
* assigned a likelihood of -13.
* @param noFpga disable FPGA acceleration
* @param pcrErrorModel model to correct for PCR indel artifacts
*/
public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final PairHMM.HMM_SUB_IMPLEMENTATION hmmSubType,
final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
this.hmmType = hmmType;
this.hmmSubType = hmmSubType;
this.constantGCP = constantGCP;
this.log10globalReadMismappingRate = log10globalReadMismappingRate;
this.noFpga = noFpga;

View File

@ -92,14 +92,14 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
protected HashMap<String, BufferedWriter> filenameToWriter = new HashMap<String, BufferedWriter>();
private JNILoglessPairHMM jniPairHMM = null;
public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType) {
public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType, PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub) {
super();
switch(hmmType) {
case VECTOR_LOGLESS_CACHING:
jniPairHMM = new VectorLoglessPairHMM();
jniPairHMM = new VectorLoglessPairHMM(pairHMMSub);
break;
default:
throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING");
throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING");
}
}

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.utils.pairhmm;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
@ -69,12 +70,6 @@ import java.util.Map;
*/
public class VectorLoglessPairHMM extends JNILoglessPairHMM {
//For machine capabilities
public static final long sse41Mask = 1;
public static final long sse42Mask = 2;
public static final long avxMask = 4;
public static final long enableAll = 0xFFFFFFFFFFFFFFFFl;
//Used to copy references to byteArrays to JNI from reads
protected class JNIReadDataHolderClass {
public byte[] readBases = null;
@ -97,25 +92,32 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
* Bit 2 represents AVX availability
*/
public native long jniGetMachineType();
/**
* Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
* C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
* change per JVM session
* @param readDataHolderClass class type of JNIReadDataHolderClass
*
* @param readDataHolderClass class type of JNIReadDataHolderClass
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
* */
* @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing bits in the mask
*/
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
public VectorLoglessPairHMM() {
public VectorLoglessPairHMM(PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub) throws UserException.HardwareFeatureException {
super();
synchronized(isVectorLoglessPairHMMLibraryLoaded) {
synchronized (isVectorLoglessPairHMMLibraryLoaded) {
// Get the mask for the requested hardware sub-implementation
// If a specifically requested hardware feature can not be supported, throw an exception
long mask = pairHMMSub.getMask();
throwIfHardwareFeatureNotSupported(mask, pairHMMSub);
//Load the library and initialize the FieldIDs
if(!isVectorLoglessPairHMMLibraryLoaded) {
if (!isVectorLoglessPairHMMLibraryLoaded) {
try
{
//Try loading from Java's library path first
@ -123,51 +125,53 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
//implementation without modifying the Java code
System.loadLibrary("VectorLoglessPairHMM");
logger.info("libVectorLoglessPairHMM found in JVM library path");
}
catch(UnsatisfiedLinkError ule)
{
} catch (UnsatisfiedLinkError ule) {
//Could not load from Java's library path - try unpacking from jar
try
{
logger.debug("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from GATK jar file");
loadLibraryFromJar("/org/broadinstitute/gatk/utils/pairhmm/libVectorLoglessPairHMM.so");
logger.info("libVectorLoglessPairHMM unpacked successfully from GATK jar file");
}
catch(IOException ioe)
{
} catch (IOException ioe) {
//Throw the UnsatisfiedLinkError to make it clear to the user what failed
throw ule;
}
}
logger.info("Using vectorized implementation of PairHMM");
isVectorLoglessPairHMMLibraryLoaded = true;
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
//need to do this only once
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, mask);
}
}
}
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<>();
private HashMap<Haplotype, Integer> haplotypeToHaplotypeListIdxMap = new HashMap<>();
private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
@Override
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() {
return haplotypeToHaplotypeListIdxMap;
}
//Used to transfer data to JNI
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
/**
* {@inheritDoc}
*/
@Override
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
final int readMaxLength, final int haplotypeMaxLength ) {
public void initialize(final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
final int readMaxLength, final int haplotypeMaxLength) {
int numHaplotypes = haplotypes.size();
mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
int idx = 0;
haplotypeToHaplotypeListIdxMap.clear();
for(final Haplotype currHaplotype : haplotypes)
{
for (final Haplotype currHaplotype : haplotypes) {
mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx);
@ -175,6 +179,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray);
}
/**
* Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
* accessing Java memory directly, still important to release memory from C++
@ -194,22 +199,22 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
* Real compute kernel
*/
private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
/**
* {@inheritDoc}
*/
@Override
public void computeLikelihoods( final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord,byte[]> gcp ) {
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods, final List<GATKSAMRecord> processedReads, final Map<GATKSAMRecord, byte[]> gcp) {
if (processedReads.isEmpty())
return;
if(doProfiling)
if (doProfiling)
startTime = System.nanoTime();
int readListSize = processedReads.size();
int numHaplotypes = likelihoods.alleleCount();
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
int idx = 0;
for(GATKSAMRecord read : processedReads)
{
for (GATKSAMRecord read : processedReads) {
readDataArray[idx] = new JNIReadDataHolderClass();
readDataArray[idx].readBases = read.getReadBases();
readDataArray[idx].readQuals = read.getBaseQualities();
@ -219,8 +224,8 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
++idx;
}
mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
if(doProfiling)
mLikelihoodArray = new double[readListSize * numHaplotypes]; //to store results
if (doProfiling)
threadLocalSetupTimeDiff = (System.nanoTime() - startTime);
//for(reads)
// for(haplotypes)
@ -228,21 +233,19 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
int readIdx = 0;
for(int r = 0; r < readListSize; r++)
{
for (int r = 0; r < readListSize; r++) {
int hapIdx = 0;
for (final Haplotype haplotype : likelihoods.alleles()) {
//Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different,
//get idx of current haplotype in the list and use this idx to get the right likelihoodValue
final int idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(haplotype);
likelihoods.set(hapIdx,r,mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
likelihoods.set(hapIdx, r, mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
++hapIdx;
}
readIdx += numHaplotypes;
}
if(doProfiling)
{
if (doProfiling) {
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
//synchronized(doProfiling)
{
@ -251,29 +254,30 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
}
}
/**
* Print final profiling information from native code
*/
public native void jniClose();
@Override
public void close()
{
if(doProfiling)
System.out.println("Time spent in setup for JNI call : "+(pairHMMSetupTime*1e-9));
public void close() {
if (doProfiling)
System.out.println("Time spent in setup for JNI call : " + (pairHMMSetupTime * 1e-9));
super.close();
jniClose();
}
//Copied from http://frommyplayground.com/how-to-load-native-jni-library-from-jar
/**
* Loads library from current JAR archive
*
* <p/>
* The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
* Method uses String as filename because the pathname is "abstract", not system-dependent.
*
*
* @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
* @throws IOException If temporary file creation or read/write operation fails
* @throws IOException If temporary file creation or read/write operation fails
* @throws IllegalArgumentException If source file (param path) does not exist
* @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
*/
@ -293,7 +297,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
if (filename != null) {
parts = filename.split("\\.", 2);
prefix = parts[0];
suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-)
suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null; // Thanks, davs! :-)
}
// Check if the filename is okay
@ -335,4 +339,36 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
// Finally, load the library
System.load(temp.getAbsolutePath());
}
/**
* If the machine does not support the requested hardware feature, throw an exception
* <p/>
* If requesting a specific hardware feature, check if the machine supports this feature.
* If it does not, throw an exception.
*
* @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask.
* @param pairHMMSub the PairHMM machine dependent sub-implementation to use for genotype likelihood calculations
* @throws UserException.HardwareFeatureException if the hardware feature is not supported
*/
private void throwIfHardwareFeatureNotSupported(long mask, PairHMM.HMM_SUB_IMPLEMENTATION pairHMMSub) throws UserException.HardwareFeatureException
{
if ( pairHMMSub.getIsSpecificHardwareRequest() ) {
if ( !isHardwareFeatureSupported(mask) )
throw new UserException.HardwareFeatureException("Machine does not support pairHMM hardware dependent sub-type = " + pairHMMSub);
}
}
/**
* Check if the machine supports the requested hardware feature
* <p/>
* Mask the bits for the hardware feature and check if they are set by the machine
* If the bits are set, the machine supports this feature
*
* @param mask a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask.
* @return true of machine supports the requested hardware feature, false otherwise
*/
private boolean isHardwareFeatureSupported(long mask)
{
return (mask & jniGetMachineType()) != 0x0;
}
}

View File

@ -126,7 +126,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
public void timeLoglessPairHMM(final int reps) {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, PairHMM.HMM_SUB_IMPLEMENTATION.ENABLE_ALL,-3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}

View File

@ -61,8 +61,10 @@ import static org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCal
public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest {
final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
private void HCTestComplexVariants(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
}
@ -73,7 +75,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
}
@ -85,7 +87,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
}
private void HCTestComplexGGA(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec);
}
@ -103,7 +105,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
}
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexConsensusMode: args=" + args, spec);
}

View File

@ -63,6 +63,8 @@ import java.util.List;
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
@DataProvider(name = "MyDataProviderHaploid")
public Object[][] makeMyDataProviderHaploid() {
List<Object[]> tests = new ArrayList<>();
@ -128,8 +130,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
*/
@Test(dataProvider = "MyDataProvider")
public void testHCWithGVCF(String bam, ReferenceConfidenceMode mode, String intervals, String md5) {
final String commandLine = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
@ -140,8 +142,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
*/
@Test(dataProvider = "MyDataProviderHaploid", enabled=true)
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
@ -152,8 +154,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
*/
@Test(dataProvider = "MyDataProviderTetraploid", enabled=true)
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
@ -161,8 +163,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@Test
public void testERCRegionWithNoCalledHaplotypes() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
executeTest("testERCRegionWithNoCalledHaplotypes", spec);
@ -170,8 +172,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@Test()
public void testMissingGVCFIndexException() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF",
b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001");
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF",
HMM_SUB_IMPLEMENTATION, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class);
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
@ -179,8 +181,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@Test()
public void testWrongParameterGVCFIndexException() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER + 1);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER + 1);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class);
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
@ -193,8 +195,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
if (HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE == GATKVCFIndexType.DYNAMIC_SEEK)
type = GATKVCFIndexType.DYNAMIC_SIZE;
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, privateTestDir + "noCallRefModel.bam", "20:17000001-18000001", type, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", 1, UserException.GVCFIndexException.class);
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
@ -205,8 +207,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@Test()
public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6a4e571abb59b925d59a38d244f0abe"));
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
@ -222,8 +224,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@Test
public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7ef1f30d92178f75e5220b16508b47cd"));
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
@ -234,8 +236,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
*/
@Test(enabled=true)
public void testGeneralPloidyArrayIndexBug1Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
HMM_SUB_IMPLEMENTATION, b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
@ -246,8 +248,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
*/
@Test(enabled=true)
public void testGeneralPloidyArrayIndexBug2Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
HMM_SUB_IMPLEMENTATION, b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);

View File

@ -61,6 +61,7 @@ import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -81,9 +82,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
final static String GGA_INTERVALS_FILE = privateTestDir + "haplotype-caller-reduced-test-interval.list";
final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
private void HCTest(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 -pairHMMSub %s -R %s -I %s -L %s", HMM_SUB_IMPLEMENTATION, REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCaller: args=" + args, spec);
}
@ -174,7 +176,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec);
}
@ -189,7 +191,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary());
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
for( final File vcf : executeTest("testHaplotypeCallerNearbySmallIntervals: args=" + args, spec).getFirst() ) {
if( containsDuplicateRecord(vcf, parser) ) {
@ -227,14 +229,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
// any of the calls in that region because it is so messy.
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cf6fbb3636c52cd47dd14e0bd415a320"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a145c3c0e99b278054a9960923da8aaa"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -242,14 +244,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDoesNotFailOnBadRefBase() {
// don't care about the output - just want to make sure it doesn't fail
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2";
final String base = String.format("-T HaplotypeCaller --disableDithering -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2";
final WalkerTestSpec spec = new WalkerTestSpec(base, Collections.<String>emptyList());
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
}
@Test
public void HCTestDanglingTailMergingForDeletions() throws IOException {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef";
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);
@ -273,7 +275,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testLeftAlignmentBamOutBugFix() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
final String base = String.format("-T HaplotypeCaller -pairHMMSub %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
+ " --no_cmdline_in_header -bamout %s -o /dev/null -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("c19f0e62f90794661f5927c360d50998"));
executeTest("LeftAlignmentBamOutBugFix", spec);
@ -289,7 +291,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("f0618b66e088b2d6fd831bb357de933c"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -297,7 +299,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDBSNPAnnotationWEx() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,100,000-11,000,000 -D " + b37dbSNP132
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,100,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("b279dfa0864495d0cbe45167c13d5a75"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
@ -306,7 +308,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("1b15b696671a9da000d5ec0372f939a2"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -314,7 +316,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestDBSNPAnnotationWExGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("e46d99fb778e2aff09ccbbc1bda2a1bf"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
@ -323,7 +325,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestGraphBasedPCRFreePositiveLogLkFix() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + hg19Reference + " --no_cmdline_in_header -I " + NA12878_PCRFREE250_ADAPTER_TRIMMED + " -o %s -L 20:10,024,000-10,024,500 "
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + hg19Reference + " --no_cmdline_in_header -I " + NA12878_PCRFREE250_ADAPTER_TRIMMED + " -o %s -L 20:10,024,000-10,024,500 "
, 1,
Arrays.asList(""));
executeTest("HCTestGraphBasedPCRFreePositiveLogLkFix", spec);
@ -338,7 +340,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestAggressivePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
Arrays.asList("163a42e1144be1dc905233a8a42b72f6"));
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
}
@ -346,16 +348,16 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,270,000-10,300,000", 1,
Arrays.asList("13298981348351e79a2ff5407f206c1d"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
}
@Test
public void testNoSuchEdgeBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -dontTrimActiveRegions -ERC GVCF " +
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s -R %s -I %s -L %s -dontTrimActiveRegions -ERC GVCF " +
"-likelihoodEngine GraphBased -variant_index_type %s -variant_index_parameter %d",
b37KGReferenceWithDecoy, privateTestDir + "graphbased_no_such_edge_bug.bam", privateTestDir + "graphbased_no_such_edge_bug.intervals.bed",
HMM_SUB_IMPLEMENTATION, b37KGReferenceWithDecoy, privateTestDir + "graphbased_no_such_edge_bug.bam", privateTestDir + "graphbased_no_such_edge_bug.intervals.bed",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
@ -365,8 +367,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
// This test takes longer than 15 secs ... ~ 25-35 ,
@Test
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
HMM_SUB_IMPLEMENTATION, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fa83f82ba63729edd8696e82bfeea49"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
@ -374,8 +376,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s -R %s -I %s -L %s --no_cmdline_in_header ",
HMM_SUB_IMPLEMENTATION, b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("423b70c151a5d0028e104aa3372b8783"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
@ -388,9 +390,9 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final String TEST_BAM = privateTestDir + "sw_epsilon_test.bam";
final String REFERENCE = b37KGReference;
final String DBSNP = b37dbSNP138;
final String commandLineWithoutInterval = String.format("-T HaplotypeCaller -I %s -R %s -D %s "
final String commandLineWithoutInterval = String.format("-T HaplotypeCaller -pairHMMSub %s -I %s -R %s -D %s "
+ "-variant_index_type LINEAR -variant_index_parameter 128000 --no_cmdline_in_header "
+ "-stand_call_conf 10.0 -stand_emit_conf 10.0",TEST_BAM,REFERENCE,DBSNP);
+ "-stand_call_conf 10.0 -stand_emit_conf 10.0",HMM_SUB_IMPLEMENTATION,TEST_BAM,REFERENCE,DBSNP);
final String commandLineShortInterval = commandLineWithoutInterval + " -L " + SHORT_INTERVAL;
final String commandLineLongInterval = commandLineWithoutInterval + " -L " + LONG_INTERVAL;
@ -406,4 +408,18 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
}
@Test
public void testHaplotypeCallerPairHMMException(){
executeTest("HaplotypeCallerPairHMMException",
new WalkerTest.WalkerTestSpec(
" -T HaplotypeCaller" +
" --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 " +
" --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 " +
" --no_cmdline_in_header -minPruning 3 -pairHMM VECTOR_LOGLESS_CACHING -pairHMMSub TEST_BEYOND_CAPABILITIES" +
" -R " + REF +
" -I " + NA12878_BAM +
" -L " + INTERVALS_FILE +
" -o %s",
1, UserException.HardwareFeatureException.class));
}
}

View File

@ -70,6 +70,8 @@ public class HaplotypeCallerModesIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
@Test
public void HCTestBamWriterCalledHaplotypes() {
HCTestBamWriter(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, ""); // current MD5 = 9a2b6157f14b44b872a77f4e75c56023
@ -82,7 +84,7 @@ public class HaplotypeCallerModesIntegrationTest extends WalkerTest {
public void HCTestBamWriter(final HaplotypeBAMWriter.Type type, final String md5) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o /dev/null " +
"-T HaplotypeCaller -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o /dev/null " +
"-bamout %s -L 20:10,000,000-10,010,000 -bamWriterType " + type, 1,
Arrays.asList(md5));
executeTest("HC writing bams with mode " + type, spec);

View File

@ -60,6 +60,9 @@ import java.util.Arrays;
import java.util.List;
public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
final static String HMM_SUB_IMPLEMENTATION = "UNVECTORIZED";
@DataProvider(name = "NCTDataProvider")
public Object[][] makeNCTDataProvider() {
List<Object[]> tests = new ArrayList<>();
@ -74,7 +77,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
@Test(dataProvider = "NCTDataProvider")
public void testHCNCT(final int nct, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T HaplotypeCaller --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I "
"-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub " + HMM_SUB_IMPLEMENTATION + " -R " + b37KGReference + " --no_cmdline_in_header -I "
+ privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam -o %s " +
" -L 20:10,000,000-10,100,000 -G none -A none -contamination 0.0 -nct " + nct, 1,
Arrays.asList(md5));

View File

@ -138,7 +138,7 @@ public class PairHMMLikelihoodCalculationEngineUnitTest extends BaseTest {
public void createPcrErrorModelTest(final String repeat, final int repeatLength) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte)0,
PairHMM.HMM_IMPLEMENTATION.ORIGINAL, 0.0, true,
PairHMM.HMM_IMPLEMENTATION.ORIGINAL, PairHMM.HMM_SUB_IMPLEMENTATION.ENABLE_ALL, 0.0, true,
PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE);
final String readString = Utils.dupString(repeat, repeatLength);

View File

@ -95,7 +95,8 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
//final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false,
// PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3);
final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, Double.NEGATIVE_INFINITY,true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, PairHMM.HMM_SUB_IMPLEMENTATION.ENABLE_ALL, Double.NEGATIVE_INFINITY,
true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
// When using likelihoods it should be around 0.05 since
// When using maximum-likelihoods it can be as low as 0.00001

View File

@ -69,7 +69,7 @@ public class Sandbox {
* change per JVM session
* @param readDataHolderClass class type of JNIReadDataHolderClass
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
* @param mask 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing bits in the mask
* */
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);

View File

@ -482,4 +482,13 @@ public class UserException extends ReviewedGATKException {
super(s);
}
}
/**
* A trivial specialization of UserException to mark that a hardware feature is not supported
*/
public static class HardwareFeatureException extends UserException {
public HardwareFeatureException(String message) {
super(message);
}
}
}

View File

@ -68,6 +68,33 @@ public abstract class PairHMM {
ARRAY_LOGLESS
}
/* Instruction sets for computing VectorLoglessHMM */
public enum HMM_SUB_IMPLEMENTATION {
/* standard un-vectorized instructions */
UNVECTORIZED(0x0L, false),
/* Streaming SIMD Extensions (SSE), version 4.1 */
SSE41(0x1L, true),
/* Streaming SIMD Extensions (SSE), version 4.2 */
SSE42(0x2L, true),
/* Advanced Vector Extensions (AVX) */
AVX(0x4L, true),
/* For testing only, set bit beyond hardware capabilities */
TEST_BEYOND_CAPABILITIES(0x400L, true),
/* Enable all implementations */
ENABLE_ALL(0xFFFFFFFFFFFFFFFFl, false);
/* Masks for machine capabilities */
private final long mask;
/* Is a specific hardware instruction set requested? */
private final boolean isSpecificHardwareRequest;
HMM_SUB_IMPLEMENTATION(long mask, boolean isSpecificHardwareRequest) {
this.mask = mask;
this.isSpecificHardwareRequest = isSpecificHardwareRequest;
}
long getMask() { return mask; }
boolean getIsSpecificHardwareRequest() { return isSpecificHardwareRequest; }
}
protected int maxHaplotypeLength, maxReadLength;
protected int paddedMaxReadLength, paddedMaxHaplotypeLength;
protected int paddedReadLength, paddedHaplotypeLength;
@ -93,8 +120,9 @@ public abstract class PairHMM {
*
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
* @param readMaxLength the max length of reads we want to use with this PairHMM
* @throws IllegalArgumentException if readMaxLength or haplotypeMaxLength is less than or equal to zero
*/
public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
public void initialize( final int readMaxLength, final int haplotypeMaxLength ) throws IllegalArgumentException {
if ( readMaxLength <= 0 ) throw new IllegalArgumentException("READ_MAX_LENGTH must be > 0 but got " + readMaxLength);
if ( haplotypeMaxLength <= 0 ) throw new IllegalArgumentException("HAPLOTYPE_MAX_LENGTH must be > 0 but got " + haplotypeMaxLength);
@ -178,8 +206,6 @@ public abstract class PairHMM {
* conditional to {@code alleles[a]}.
* @param gcp penalty for gap continuations base array map for processed reads.
*
* @throws IllegalArgumentException
*
* @return never {@code null}.
*/
public void computeLikelihoods(final ReadLikelihoods.Matrix<Haplotype> likelihoods,
@ -248,6 +274,10 @@ public abstract class PairHMM {
* @param overallGCP the phred-scaled gap continuation penalties scores of read. Must be the same length as readBases
* @param recacheReadValues if false, we don't recalculate any cached results, assuming that readBases and its associated
* parameters are the same, and only the haplotype bases are changing underneath us
* @throws IllegalStateException if did not call initialize() beforehand
* @throws IllegalArgumentException haplotypeBases is null or greater than maxHaplotypeLength
* @throws IllegalArgumentException readBases is null or greater than maxReadLength
* @throws IllegalArgumentException readBases, readQuals, insertionGOP, deletionGOP and overallGCP are not the same size
* @return the log10 probability of read coming from the haplotype under the provided error model
*/
protected final double computeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
@ -257,7 +287,7 @@ public abstract class PairHMM {
final byte[] deletionGOP,
final byte[] overallGCP,
final boolean recacheReadValues,
final byte[] nextHaploytpeBases) {
final byte[] nextHaploytpeBases) throws IllegalStateException, IllegalArgumentException {
if ( ! initialized ) throw new IllegalStateException("Must call initialize before calling computeReadLikelihoodGivenHaplotypeLog10");
if ( haplotypeBases == null ) throw new IllegalArgumentException("haplotypeBases cannot be null");
@ -318,9 +348,10 @@ public abstract class PairHMM {
*
* @param haplotype1 the first haplotype1
* @param haplotype2 the second haplotype1
* @throws IllegalArgumentException if haplotype1 or haplotype2 are null or zero length
* @return the index of the first position in haplotype1 and haplotype2 where the byte isn't the same
*/
public static int findFirstPositionWhereHaplotypesDiffer(final byte[] haplotype1, final byte[] haplotype2) {
public static int findFirstPositionWhereHaplotypesDiffer(final byte[] haplotype1, final byte[] haplotype2) throws IllegalArgumentException {
if ( haplotype1 == null || haplotype1.length == 0 ) throw new IllegalArgumentException("Haplotype1 is bad " + Arrays.toString(haplotype1));
if ( haplotype2 == null || haplotype2.length == 0 ) throw new IllegalArgumentException("Haplotype2 is bad " + Arrays.toString(haplotype2));