diff --git a/build.xml b/build.xml
index b9fdd5dbe..80627fae0 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,7 @@
+
@@ -44,11 +45,11 @@
-
-
+
+
-
-
+
+
@@ -101,7 +102,7 @@
-
+
@@ -126,14 +127,14 @@
-
+
-
+
-
+
@@ -227,6 +228,10 @@
+
+
+
+
@@ -250,7 +255,7 @@
-
+
@@ -285,7 +290,7 @@
depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
description="compile the GATK source" />
-
+
@@ -297,7 +302,16 @@
-
+
+
+
+
+
+
+
+
+
+
@@ -310,7 +324,7 @@
+ description="create GATK contracts" if="include.contracts" />
@@ -448,7 +462,7 @@
-
+
@@ -659,7 +673,7 @@
-
+
-
-
+
@@ -815,7 +831,7 @@
-
+
@@ -823,7 +839,7 @@
-
+
@@ -916,8 +932,8 @@
-
-
+
+
@@ -939,7 +955,7 @@
-
+
@@ -964,7 +980,7 @@
-
+
diff --git a/ivy.xml b/ivy.xml
index 10e4ee570..3f3d1c97f 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -63,6 +63,10 @@
+
+
+
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
index ff59c9e29..2cbc66e31 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@@ -43,9 +43,9 @@ public class AlleleCount extends VariantStratifier {
if (eval != null) {
int AC = -1;
- if ( eval.hasAttribute("AC") )
+ if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
AC = eval.getAttributeAsInt("AC");
- else if ( eval.isVariant() ) {
+ } else if ( eval.isVariant() ) {
for (Allele allele : eval.getAlternateAlleles())
AC = Math.max(AC, eval.getChromosomeCount(allele));
} else
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 5f35c182c..ddeda1699 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -82,19 +82,11 @@ public class VariantDataManager {
}
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
- if( annotationKeys.get(iii).toLowerCase().contains("ranksum") ) { // BUGBUG: to clean up
- for( final VariantDatum datum : data ) {
- if( datum.annotations[iii] > 0.0 ) { datum.annotations[iii] /= 3.0; }
- }
- }
meanVector[iii] = theMean;
varianceVector[iii] = theSTD;
for( final VariantDatum datum : data ) {
+ // Transform each data point via: (x - mean) / standard deviation
datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD );
- // Each data point is now [ (x - mean) / standard deviation ]
- if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) {
- datum.annotations[iii] /= 3.0;
- }
}
}
if( foundZeroVarianceAnnotation ) {
@@ -163,7 +155,7 @@ public class VariantDataManager {
final int numBadSitesAdded = trainingData.size();
logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." );
- // Next, sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
+ // Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
Collections.sort( data );
final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) );
if( numToAdd > data.size() ) {
@@ -241,23 +233,15 @@ public class VariantDataManager {
double value;
try {
- if( annotationKey.equalsIgnoreCase("QUAL") ) {
- value = vc.getPhredScaledQual();
- } else if( annotationKey.equalsIgnoreCase("DP") ) {
- value = Double.parseDouble( (String)vc.getAttribute( "DP" ) ) / Double.parseDouble( (String)vc.getAttribute( "AN" ) );
- } else {
- value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
- if( Double.isInfinite(value) ) { value = Double.NaN; }
- if( annotationKey.equalsIgnoreCase("InbreedingCoeff") && value > 0.05 ) { value = Double.NaN; }
- if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
- value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
- }
- if( annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
- if( annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
+ value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
+ if( Double.isInfinite(value) ) { value = Double.NaN; }
+ if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
+ value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
}
-
+ if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
+ if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
} catch( Exception e ) {
- value = Double.NaN; // The VQSR works with missing data now by marginalizing over the missing dimension when evaluating Gaussians
+ value = Double.NaN; // The VQSR works with missing data by marginalizing over the missing dimension when evaluating the Gaussian mixture model
}
return value;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index 2c51f02d6..2d0355d7d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -284,7 +284,7 @@ public class VariantRecalibrator extends RodWalker newAlleles = new ArrayList();
- loc = clipAlleles(pos, ref, alleles, newAlleles);
+ loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles;
}
@@ -504,7 +504,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @param clippedAlleles output list of clipped alleles
* @return a list of alleles, clipped to the reference
*/
- protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles) {
+ protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) {
// Note that the computation of forward clipping here is meant only to see whether there is a common
// base to all alleles, and to correctly compute reverse clipping,
@@ -522,6 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
clipping = false;
+ else if (ref.length() == reverseClipped)
+ generateException("bad alleles encountered", lineNo);
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
clipping = false;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
index cb6557408..31791e805 100755
--- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java
@@ -133,8 +133,12 @@ public class Haplotype {
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
+ int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
+ // protect against long events that overrun available reference context
+ if (startAfter > refBases.length)
+ startAfter = refBases.length;
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
- startIdxInReference+numPrefBases+ refAllele.getBases().length, refBases.length);
+ startAfter, refBases.length);
// Create location for all haplotypes
diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java
index 61bb8b34b..b469c8a41 100755
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -12,6 +12,10 @@ import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
*
@@ -107,6 +111,57 @@ public abstract class BaseTest {
}
}
+ /**
+ * Simple generic utility class to creating TestNG data providers:
+ *
+ * 1: inherit this class, as in
+ *
+ * private class SummarizeDifferenceTest extends TestDataProvider {
+ * public SummarizeDifferenceTest() {
+ * super(SummarizeDifferenceTest.class);
+ * }
+ * ...
+ * }
+ *
+ * Provide a reference to your class to the TestDataProvider constructor.
+ *
+ * 2: Create instances of your subclass. Return from it the call to getTests, providing
+ * the class type of your test
+ *
+ * @DataProvider(name = "summaries")
+ * public Object[][] createSummaries() {
+ * new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
+ * new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
+ * return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
+ * }
+ *
+ * This class magically tracks created objects of this
+ */
+ public static class TestDataProvider {
+ private static final Map> tests = new HashMap>();
+
+ /**
+ * Create a new TestDataProvider instance bound to the class variable C
+ * @param c
+ */
+ public TestDataProvider(Class c) {
+ if ( ! tests.containsKey(c) )
+ tests.put(c, new ArrayList