diff --git a/build.xml b/build.xml
index 21066686f..a99730f0e 100644
--- a/build.xml
+++ b/build.xml
@@ -69,8 +69,6 @@
-
-
@@ -146,11 +144,7 @@
-
-
-
-
-
@@ -178,13 +172,23 @@
-
-
+
+
+
+
+
-
+
+
+
+
+
+
+
+
@@ -214,12 +218,6 @@
-
-
-
-
-
-
@@ -357,7 +355,6 @@
-
@@ -374,7 +371,6 @@
-
@@ -464,7 +460,7 @@
-
+
@@ -682,7 +678,6 @@
-
diff --git a/ivy.xml b/ivy.xml
index c2a6c4ccd..10e4ee570 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -48,6 +48,9 @@
+
+
+
diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
index 3cf43c15a..f8e298d88 100755
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@@ -61,7 +61,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
- private String PATH_TO_RESOURCES = "R/";
+ private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5;
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 1045ca371..055eb0b97 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -92,25 +92,31 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
+ @Hidden
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
+ @Hidden
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
public double INDEL_GAP_OPEN_PENALTY = 45.0;
+ @Hidden
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
public int INDEL_HAPLOTYPE_SIZE = 80;
+ @Hidden
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
//gdebug+
- @Hidden
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
+ @Hidden
@Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
public boolean GET_GAP_PENALTIES_FROM_DATA = false;
+ @Hidden
@Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
+ @Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
@Hidden
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
index aebb0e3eb..df1f4f908 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java
@@ -5,6 +5,7 @@ import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.*;
@@ -113,9 +114,10 @@ public class ConstrainedMateFixingManager {
HashMap forMateMatching = new HashMap();
TreeSet waitingReads = new TreeSet(comparer);
- private T remove(TreeSet treeSet) {
- final T first = treeSet.first();
- treeSet.remove(first);
+ private SAMRecord remove(TreeSet treeSet) {
+ final SAMRecord first = treeSet.first();
+ if ( !treeSet.remove(first) )
+ throw new UserException("Error caching SAM record " + first.getReadName() + ", which is usually caused by malformed SAM/BAM files in which multiple identical copies of a read are present.");
return first;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
index 02d850211..d4e07dccf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The Broad Institute
+ * Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
index a09a30145..17461de2f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java
@@ -1,3 +1,28 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import Jama.Matrix;
@@ -72,7 +97,7 @@ public class GaussianMixtureModel {
int ttt = 0;
while( ttt++ < numIterations ) {
- // Estep: assign each variant to the nearest cluster
+ // E step: assign each variant to the nearest cluster
for( final VariantDatum datum : data ) {
double minDistance = Double.MAX_VALUE;
MultivariateGaussian minGaussian = null;
@@ -87,7 +112,7 @@ public class GaussianMixtureModel {
datum.assignment = minGaussian;
}
- // Mstep: update gaussian means based on assigned variants
+ // M step: update gaussian means based on assigned variants
for( final MultivariateGaussian gaussian : gaussians ) {
gaussian.zeroOutMu();
int numAssigned = 0;
@@ -204,26 +229,29 @@ public class GaussianMixtureModel {
}
public double evaluateDatumMarginalized( final VariantDatum datum ) {
- int numVals = 0;
+ int numSamples = 0;
double sumPVarInGaussian = 0.0;
- int numIter = 10;
+ final int numIterPerMissingAnnotation = 10; // Trade off here between speed of computation and accuracy of the marginalization
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
+ // for each dimension
for( int iii = 0; iii < datum.annotations.length; iii++ ) {
- // marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
+ // if it is missing marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
if( datum.isNull[iii] ) {
- for( int ttt = 0; ttt < numIter; ttt++ ) {
- datum.annotations[iii] = Normal.staticNextDouble(0.0, 1.0);
+ for( int ttt = 0; ttt < numIterPerMissingAnnotation; ttt++ ) {
+ datum.annotations[iii] = GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); // draw a random sample from the standard normal distribution
+ // evaluate this random data point
int gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
}
- sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10));
- numVals++;
+ // add this sample's probability to the pile in order to take an average in the end
+ sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10)); // p = 10 ^ Sum(pi_k * p(v|n,k))
+ numSamples++;
}
}
}
- return Math.log10( sumPVarInGaussian / ((double) numVals) );
+ return Math.log10( sumPVarInGaussian / ((double) numSamples) );
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java
index 0b2edfd10..d077af78e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java
@@ -1,3 +1,28 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import Jama.Matrix;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
index 9bbcf395a..6c1a7ddbc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
@@ -1,3 +1,28 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java
index fbee64fe2..64fe36637 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The Broad Institute
+ * Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
index 08388db21..19c6d501b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
@@ -1,3 +1,28 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java
index 2914385a4..5deb5d8c2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java
@@ -1,8 +1,32 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantQualityScore;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index f83e9b2f0..5f35c182c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -1,6 +1,30 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
-import cern.jet.random.Normal;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@@ -66,7 +90,7 @@ public class VariantDataManager {
meanVector[iii] = theMean;
varianceVector[iii] = theSTD;
for( final VariantDatum datum : data ) {
- datum.annotations[iii] = ( datum.isNull[iii] ? Normal.staticNextDouble(0.0, 1.0) : ( datum.annotations[iii] - theMean ) / theSTD );
+ datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD );
// Each data point is now [ (x - mean) / standard deviation ]
if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) {
datum.annotations[iii] /= 3.0;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
index 04a5a9d3e..eb9e98fcb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java
@@ -1,3 +1,28 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index b903d20af..2c51f02d6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -88,7 +88,7 @@ public class VariantRecalibrator extends RodWalker(Arrays.asList(USE_ANNOTATIONS)), VRAC );
if( IGNORE_INPUT_FILTERS != null ) {
@@ -228,19 +229,23 @@ public class VariantRecalibrator extends RodWalker reduceSum ) {
dataManager.setData( reduceSum );
dataManager.normalizeData(); // Each data point is now (x - mean) / standard deviation
+
+ // Generate the positive model using the training data and evaluate each variant
final GaussianMixtureModel goodModel = engine.generateModel( dataManager.getTrainingData() );
engine.evaluateData( dataManager.getData(), goodModel, false );
+
+ // Generate the negative model using the worst performing data and evaluate each variant contrastively
final GaussianMixtureModel badModel = engine.generateModel( dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ) );
engine.evaluateData( dataManager.getData(), badModel, true );
engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel );
- final ExpandingArrayList randomData = dataManager.getRandomDataForPlotting( 6000 );
-
+ // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
final List tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
tranchesStream.print(Tranche.tranchesString( tranches ));
+ // Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
@@ -252,7 +257,7 @@ public class VariantRecalibrator extends RodWalker newAlleles = new ArrayList();
- loc = clipAlleles(pos, ref, alleles, newAlleles);
+ loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles;
}
@@ -504,7 +504,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @param clippedAlleles output list of clipped alleles
* @return a list of alleles, clipped to the reference
*/
- protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles) {
+ protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) {
// Note that the computation of forward clipping here is meant only to see whether there is a common
// base to all alleles, and to correctly compute reverse clipping,
@@ -522,6 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
clipping = false;
+ else if (ref.length() == reverseClipped)
+ generateException("bad alleles encountered", lineNo);
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
clipping = false;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 3d375aba2..5787b591f 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -1343,6 +1343,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return (int)stop;
}
+ private boolean hasSymbolicAlleles() {
+ for (Allele a: getAlleles()) {
+ if (a.isSymbolic()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
Allele refAllele = inputVC.getReference();
@@ -1352,7 +1361,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
- if (refAllele.length() == locLength)
+ if (inputVC.hasSymbolicAlleles())
+ padVC = true;
+ else if (refAllele.length() == locLength)
padVC = false;
else if (refAllele.length() == locLength-1)
padVC = true;
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
index d71c6ae5d..a961beca1 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
@@ -19,10 +19,7 @@ import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
class MethodsDevelopmentCallingPipeline extends QScript {
qscript =>
- @Argument(shortName="gatk", doc="gatk jar file", required=true)
- var gatkJarFile: File = _
-
- @Argument(shortName="outputDir", doc="output directory", required=true)
+ @Argument(shortName="outputDir", doc="output directory", required=false)
var outputDir: String = "./"
@Argument(shortName="skipCalling", doc="skip the calling part of the pipeline and only run VQSR on preset, gold standard VCF files", required=false)
@@ -185,7 +182,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
logging_level = "INFO";
- jarFile = gatkJarFile;
memoryLimit = 4;
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
}
diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml
index 1e47fa847..2c8fc388f 100644
--- a/settings/ivysettings.xml
+++ b/settings/ivysettings.xml
@@ -25,5 +25,6 @@
+
diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar
new file mode 100644
index 000000000..dc77c7d33
Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2-sources.jar differ
diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar
new file mode 100644
index 000000000..f267be4b5
Binary files /dev/null and b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.jar differ
diff --git a/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml
new file mode 100644
index 000000000..c6a8da052
--- /dev/null
+++ b/settings/repository/net.sf.gridscheduler/drmaa-6.2u5p2.xml
@@ -0,0 +1,3 @@
+
+
+