diff --git a/ivy.xml b/ivy.xml
index b197d0714..6dd5be7a8 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -52,7 +52,7 @@
-
+
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
index 9eca81852..d714ca185 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
@@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
index 9ee1a4634..3b9fc0390 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
@@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
index 864414de9..8e4ca9595 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@@ -68,10 +68,10 @@ public class ErrorModel {
break;
}
}
+ haplotypeMap = new LinkedHashMap();
if (refSampleVC.isIndel()) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
- haplotypeMap = new LinkedHashMap();
indelLikelihoodMap = new HashMap>();
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
}
@@ -96,7 +96,8 @@ public class ErrorModel {
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
- perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
+ if (!haplotypeMap.isEmpty())
+ perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
}
int idx = 0;
for (PileupElement refPileupElement : refSamplePileup) {
@@ -108,7 +109,7 @@ public class ErrorModel {
if (DEBUG) System.out.println(m);
isMatch |= m;
}
- if (refSampleVC.isIndel()) {
+ if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
if (!isInformativeElement(perAlleleLikelihoods))
@@ -173,10 +174,10 @@ public class ErrorModel {
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
if (allele.isReference()) {
// for a ref allele, any base mismatch or new indel is a mismatch.
- if(allele.getBases().length>0 )
+ if(allele.getBases().length>0)
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
// could clean up/simplify this when unpadding is removed
- return (pileupElement.getBase() == refBase);
+ return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
else
// either null allele to compare, or ref/alt lengths are different (indel by definition).
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
similarity index 95%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
index b8be24cad..ba19638e0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
@@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
-public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
+public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC;
@@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false;
- protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
+ protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy;
this.UAC = UAC;
@@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
- final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
+ final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0)
@@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
return newPool;
}
- // todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
+ // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
/**
*
* @param set ExactACset holding conformation to be computed
@@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
continue;
- PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
+ GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
}
@@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
- PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
+ GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next();
}
- PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+ GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone();
while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
- final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
+ final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
// for current conformation, get all possible ways to break vector K into two components G1 and G2
- final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+ final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
while (innerIterator.hasNext()) {
// check if breaking current conformation into g1 and g2 is feasible.
@@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( numOriginalAltAlleles == numNewAltAlleles) {
newLikelihoods = originalLikelihoods;
} else {
- newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
+ newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// find the genotype with maximum likelihoods
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
- final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
+ final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList alleleFreqs = new ArrayList();
final ArrayList alleleCounts = new ArrayList();
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
similarity index 97%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
index 438acbacd..6b0831323 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import java.util.*;
-public abstract class PoolGenotypeLikelihoods {
+public abstract class GeneralPloidyGenotypeLikelihoods {
protected final int numChromosomes;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
private static final boolean FAST_GL_COMPUTATION = true;
// constructor with given logPL elements
- public PoolGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy,
- final HashMap perLaneErrorModels, final boolean ignoreLaneInformation) {
+ public GeneralPloidyGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy,
+ final HashMap perLaneErrorModels, final boolean ignoreLaneInformation) {
this.alleles = alleles;
this.nAlleles = alleles.size();
numChromosomes = ploidy;
@@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
} else {
if (logLikelihoods.length != likelihoodDim)
- throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
+ throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
}
@@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
final int numAlleles = currentState.length;
final int ploidy = restrictSumTo;
- linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
+ linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
}
else
throw new ReviewedStingException("BUG: Not supported");
@@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
final List originalAlleles, final List allelesToSubset) {
- int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
+ int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
double[] newPLs = new double[newPLSize];
@@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
newCount[idx] = pVec[permutationKey[idx]];
// get corresponding index from new count
- int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
+ int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
newPLs[outputIdx] = pl;
if (VERBOSE) {
System.out.println("Old Key:"+Arrays.toString(pVec));
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
similarity index 94%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
index 685091678..f6ce818be 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
@@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
-public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
+public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
//protected Set laneIDs;
public enum Model {
@@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
final protected UnifiedArgumentCollection UAC;
- protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+ protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC,logger);
this.UAC = UAC;
@@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
protected static class PoolGenotypeData {
public final String name;
- public final PoolGenotypeLikelihoods GL;
+ public final GeneralPloidyGenotypeLikelihoods GL;
public final int depth;
public final List alleles;
- public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List alleles) {
+ public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List alleles) {
this.name = name;
this.GL = GL;
this.depth = depth;
@@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
// create the GenotypeLikelihoods object
- final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
+ final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
// actually compute likelihoods
final int nGoodBases = GL.add(pileup, UAC);
if ( nGoodBases > 0 )
@@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array
- final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
+ final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
allAlleles, alleles);
// normalize in log space so that max element is zero.
@@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
Abstract methods - must be implemented in derived classes
*/
- protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
+ protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap perLaneErrorModels,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
similarity index 92%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
index 33b7b8b90..4f42f820e 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
@@ -18,7 +18,7 @@ import java.util.*;
* Time: 10:06 AM
* To change this template use File | Settings | File Templates.
*/
-public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
+public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
final PairHMMIndelErrorModel pairModel;
final LinkedHashMap haplotypeMap;
final ReferenceContext refContext;
@@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
final byte refBase;
- public PoolIndelGenotypeLikelihoods(final List alleles,
- final double[] logLikelihoods,
- final int ploidy,
- final HashMap perLaneErrorModels,
- final boolean ignoreLaneInformation,
- final PairHMMIndelErrorModel pairModel,
- final LinkedHashMap haplotypeMap,
- final ReferenceContext referenceContext) {
+ public GeneralPloidyIndelGenotypeLikelihoods(final List alleles,
+ final double[] logLikelihoods,
+ final int ploidy,
+ final HashMap perLaneErrorModels,
+ final boolean ignoreLaneInformation,
+ final PairHMMIndelErrorModel pairModel,
+ final LinkedHashMap haplotypeMap,
+ final ReferenceContext referenceContext) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.pairModel = pairModel;
this.haplotypeMap = haplotypeMap;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
similarity index 89%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
index 1fef76116..f6559f666 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
-public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
@@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
*/
- protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
+ protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
super(UAC, logger);
@@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
- protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
+ protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation){
- return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
+ return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
}
protected List getInitialAllelesToUse(final RefMetaDataTracker tracker,
@@ -96,6 +94,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
haplotypeMap.clear();
}
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
+
+ // sanity check: if haplotype map couldn't be created, clear allele list
+ if (haplotypeMap.isEmpty())
+ alleles.clear();
return alleles;
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
similarity index 94%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
index 1e445270f..944372907 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
@@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -22,7 +23,7 @@ import static java.lang.Math.pow;
* and posteriors given a pile of bases and quality scores
*
*/
-public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
+public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
final List myAlleles;
final int[] alleleIndices;
@@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
* @param useBQAedPileup Use BAQed pileup
* @param ignoreLaneInformation If true, lane info is ignored
*/
- public PoolSNPGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy,
- final HashMap perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
+ public GeneralPloidySNPGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy,
+ final HashMap perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.useBAQedPileup = useBQAedPileup;
myAlleles = new ArrayList(alleles);
- refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
+ Allele refAllele = alleles.get(0);
+ //sanity check: by construction, first allele should ALWAYS be the reference alleles
+ if (!refAllele.isReference())
+ throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
+
+ refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
if (myAlleles.size() < BaseUtils.BASES.length) {
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
similarity index 91%
rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java
rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
index 61f505445..30d614455 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
@@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
-public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
- protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
+ protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
}
- protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
+ protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation) {
- return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
+ return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}
protected List getInitialAllelesToUse(final RefMetaDataTracker tracker,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 6bbfb0391..1ea7354cf 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
+ UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
similarity index 96%
rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java
rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
index 5a6f7df0f..983f562d2 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
@@ -19,7 +19,7 @@ import java.util.Arrays;
* Time: 7:44 AM
* To change this template use File | Settings | File Templates.
*/
-public class PoolAFCalculationModelUnitTest extends BaseTest {
+public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
- final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
+ final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors
- PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
+ GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
similarity index 90%
rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java
rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
index 74abb6b11..f95ba66b2 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
@@ -41,7 +41,7 @@ import java.io.PrintStream;
import java.util.*;
-public class PoolGenotypeLikelihoodsUnitTest {
+public class GeneralPloidyGenotypeLikelihoodsUnitTest {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
@@ -60,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
public void testStoringLikelihoodElements() {
- // basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
+ // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
int ploidy = 20;
int numAlleles = 4;
@@ -78,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int k=0; k < gls.length; k++)
gls[k]= (double)k;
- PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
+ GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
double[] glnew = gl.getLikelihoods();
Assert.assertEquals(gls, glnew);
@@ -90,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int ploidy = 2; ploidy < 10; ploidy++) {
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
- Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
+ Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
}
@@ -102,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create iterator, compare linear index given by iterator with closed form function
int numAlleles = 4;
int ploidy = 2;
- PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
+ GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
while(iterator.hasNext()) {
System.out.format("\n%d:",iterator.getLinearIndex());
@@ -111,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
System.out.format("%d ",aa);
- int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
+ int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
System.out.format("Computed idx = %d\n",computedIdx);
iterator.next();
}
@@ -140,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
allelesToSubset.add(Allele.create("A",false));
allelesToSubset.add(Allele.create("C",false));
- double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
+ double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
originalAlleles, allelesToSubset);
@@ -170,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testIndexIterator() {
int[] seed = new int[]{1,2,3,4};
- PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
+ GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
@@ -228,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
- private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
- PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
+ private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
+ GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
while(iterator.hasNext()) {
int[] a = iterator.getCurrentVector();
- int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
+ int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
if (VERBOSE) {
System.out.format("%d:",iterator.getLinearIndex());
for (int i=0; i < seed.length; i++)
@@ -289,13 +289,11 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
- // TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
- // TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
- @Test (enabled = false)
+ @Test
public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refByte = refPileupTestProvider.getRefByte();
- final String altBases = (char)refByte + "TCA";
+ final String altBases = "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List trueAlleles = new ArrayList();
trueAlleles.add(Allele.create(refByte, true));
@@ -316,7 +314,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise
// CASE 1: Test HET insertion
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
- Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
+ Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
@@ -393,6 +391,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
+
final List allAlleles = new ArrayList(); // this contains only ref Allele up to now
final Set laneIDs = new TreeSet();
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
@@ -409,23 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel);
- final int refIdx = 0;
- int altIdx = 2;
-
- // ref allele must be first
- allAlleles.add(Allele.create(refByte, true));
+ // all first ref allele
+ allAlleles.add(Allele.create(refByte,true));
for (byte b: BaseUtils.BASES) {
- if (refByte != b) {
- if (b == altByte)
- altIdx = allAlleles.size();
+ if (refByte != b)
allAlleles.add(Allele.create(b, false));
- }
}
+ final int refIdx = 0;
+ int altIdx = -1;
+
+ for (int k=0; k < allAlleles.size(); k++)
+ if (altByte == allAlleles.get(k).getBases()[0]) {
+ altIdx = k;
+ break;
+ }
+
+
+
PrintStream out = null;
if (SIMULATE_NOISY_PILEUP) {
try {
- out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
+ out = new PrintStream(new File("GLUnitTest.table"));
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
}
catch (Exception e) {}
@@ -449,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
- final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
+ final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
if (VERBOSE) {
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@@ -478,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
- final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
+ final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
mlPair = noisyGL.getMostLikelyACCount();
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
similarity index 76%
rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java
rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
index c6b1a8b7f..9b3103274 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@@ -12,33 +12,33 @@ import org.testng.annotations.Test;
* Time: 11:28 AM
* To change this template use File | Settings | File Templates.
*/
-public class PoolCallerIntegrationTest extends WalkerTest {
+public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
final String REFSAMPLE_NAME = "NA12878";
- final String MTINTERVALS = "MT";
+ final String MTINTERVALS = "MT:1-3000";
final String LSVINTERVALS = "20:40,000,000-41,000,000";
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
private void PC_MT_Test(String bam, String args, String name, String md5) {
- final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
+ final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test(String args, String name, String model, String md5) {
- final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
+ final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
- final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
+ final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
@@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest {
@Test
public void testBOTH_GGA_Pools() {
- PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
+ PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
}
@Test
public void testINDEL_GGA_Pools() {
- PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
+ PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
}
@Test
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
+ PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
}
@Test
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
+ PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
}
@Test
public void testMT_SNP_DISCOVERY_sp4() {
- PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
+ PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
}
@Test
public void testMT_SNP_GGA_sp10() {
- PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
+ PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
}
}
diff --git a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
similarity index 99%
rename from public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
rename to public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
index fd301cd97..6c4dace1d 100644
--- a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@@ -1,4 +1,5 @@
library("ggplot2")
+library("tools") #For compactPDF in R 2.13+
args <- commandArgs(TRUE)
data <- read.csv(args[1])
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index 306ebdd0e..312d31727 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -130,12 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 )
- exitSystemWithUserError(new UserException(t.getCause().getMessage()));
+ exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
- if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
+ if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index eab9fde79..56fcf0652 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -274,6 +274,38 @@ public class GenomeAnalysisEngine {
//return result;
}
+ // TODO -- Let's move this to a utility class in unstable - but which one?
+ // **************************************************************************************
+ // * Handle Deprecated Walkers *
+ // **************************************************************************************
+
+ // Mapping from walker name to major version number where the walker first disappeared
+ private static Map deprecatedGATKWalkers = new HashMap();
+ static {
+ deprecatedGATKWalkers.put("CountCovariates", "2.0");
+ deprecatedGATKWalkers.put("TableRecalibration", "2.0");
+ }
+
+ /**
+ * Utility method to check whether a given walker has been deprecated in a previous GATK release
+ *
+ * @param walkerName the walker class name (not the full package) to check
+ */
+ public static boolean isDeprecatedWalker(final String walkerName) {
+ return deprecatedGATKWalkers.containsKey(walkerName);
+ }
+
+ /**
+ * Utility method to check whether a given walker has been deprecated in a previous GATK release
+ *
+ * @param walkerName the walker class name (not the full package) to check
+ */
+ public static String getDeprecatedMajorVersionNumber(final String walkerName) {
+ return deprecatedGATKWalkers.get(walkerName);
+ }
+
+ // **************************************************************************************
+
/**
* Retrieves an instance of the walker based on the walker name.
*
@@ -287,6 +319,9 @@ public class GenomeAnalysisEngine {
if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) {
e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK");
}
+ else if ( isDeprecatedWalker(walkerName) ) {
+ e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName));
+ }
throw e;
}
}
@@ -762,6 +797,14 @@ public class GenomeAnalysisEngine {
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
+ if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
+ throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
+
+ boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
+
+ if (argCollection.keepProgramRecords)
+ removeProgramRecords = false;
+
return new SAMDataSource(
samReaderIDs,
threadAllocation,
@@ -778,7 +821,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(),
refReader,
getBaseRecalibration(),
- argCollection.defaultBaseQualities);
+ argCollection.defaultBaseQualities,
+ removeProgramRecords);
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 3fd3857c5..972116952 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -249,6 +249,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
+ @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
+ public boolean removeProgramRecords = false;
+
+ @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
+ public boolean keepProgramRecords = false;
+
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 0fa4234b3..7f0a0c4c0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -89,6 +89,11 @@ public class SAMDataSource {
*/
private final SAMFileReader.ValidationStringency validationStringency;
+ /**
+ * Do we want to remove the program records from this data source?
+ */
+ private final boolean removeProgramRecords;
+
/**
* Store BAM indices for each reader present.
*/
@@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
- (byte) -1);
+ (byte) -1,
+ false);
}
/**
@@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
- byte defaultBaseQualities) {
+ byte defaultBaseQualities,
+ boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser;
@@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null;
validationStringency = strictness;
+ this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize);
else {
@@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
- SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
+ SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers);
availableResources.add(readers);
}
@@ -777,9 +785,11 @@ public class SAMDataSource {
/**
* Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load.
+ * TODO: validationStringency is not used here
* @param validationStringency validation stringency.
+ * @param removeProgramRecords indicate whether to clear program records from the readers
*/
- public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+ public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start();
@@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
+ if (removeProgramRecords) {
+ init.reader.getFileHeader().setProgramRecords(new ArrayList());
+ }
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
index cb8786be1..300e801e6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
@@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage extends Walker {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
index e94d01d5a..2a92d8831 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+@RemoveProgramRecords
public abstract class LocusWalker extends Walker {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
new file mode 100644
index 000000000..d9abc7925
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
@@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 8/2/12
+ * Time: 1:58 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that program records should be removed from SAM headers by default for this walker
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface RemoveProgramRecords {
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
index 122958ac2..a6d82d5b3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
@@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import java.io.File;
import java.io.FileNotFoundException;
@@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
- RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+ RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
- RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+ RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
generalReport.output(outputFile);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
index b2400a49d..512434e6d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.collections.Pair;
@@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -58,7 +62,7 @@ import java.util.ArrayList;
* This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
* only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
* of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
- * reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
+ * reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
* probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
* The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
*
@@ -90,7 +94,7 @@ import java.util.ArrayList;
*
Examples
*
* java -Xmx4g -jar GenomeAnalysisTK.jar \
- * -T BaseQualityScoreRecalibrator \
+ * -T BaseRecalibrator \
* -I my_reads.bam \
* -R resources/Homo_sapiens_assembly18.fasta \
* -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
@@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
- private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
+ private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables;
@@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) {
- RecalDataManager.listAvailableCovariates(logger);
+ RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
- Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
+ Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList requiredCovariates = covariates.getFirst();
ArrayList optionalCovariates = covariates.getSecond();
@@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
if (readNotSeen(read)) {
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
- RecalDataManager.parsePlatformForRead(read, RAC);
- if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
+ RecalUtils.parsePlatformForRead(read, RAC);
+ if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
continue;
}
- read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
+ read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
}
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
- RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
- RecalDataManager.isColorSpaceConsistent(read, offset))
+ RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
+ RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
}
countedSites++;
@@ -271,13 +275,16 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
public void onTraversalDone(Long result) {
logger.info("Calculating quantized quality scores...");
quantizeQualityScores();
+
+ logger.info("Writing recalibration report...");
+ generateReport();
+ logger.info("...done!");
+
if (!RAC.NO_PLOTS) {
logger.info("Generating recalibration plots...");
generatePlots();
}
- logger.info("Writing recalibration report...");
- generateReport();
- logger.info("...done!");
+
logger.info("Processed: " + result + " sites");
}
@@ -285,10 +292,10 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
- RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+ RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
else
- RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+ RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
@@ -309,7 +316,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
- RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
+ RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
deleted file mode 100644
index d7e8e16b5..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
+++ /dev/null
@@ -1,109 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Jan 6, 2010
- *
- * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
- * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class Datum {
-
- long numObservations; // number of bases seen in total
- long numMismatches; // number of bases seen that didn't match the reference
-
- private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // constructors
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public Datum() {
- numObservations = 0L;
- numMismatches = 0L;
- }
-
- public Datum(long numObservations, long numMismatches) {
- this.numObservations = numObservations;
- this.numMismatches = numMismatches;
- }
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // increment methods
- //
- //---------------------------------------------------------------------------------------------------------------
-
- synchronized void increment(final long incObservations, final long incMismatches) {
- numObservations += incObservations;
- numMismatches += incMismatches;
- }
-
- synchronized void increment(final boolean isError) {
- numObservations++;
- numMismatches += isError ? 1:0;
- }
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // methods to derive empirical quality score
- //
- //---------------------------------------------------------------------------------------------------------------
-
- double empiricalQualDouble() {
- final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
- final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
- final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
- return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
- }
-
- byte empiricalQualByte() {
- final double doubleMismatches = (double) (numMismatches);
- final double doubleObservations = (double) (numObservations);
- return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
- }
-
- @Override
- public String toString() {
- return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof Datum))
- return false;
- Datum other = (Datum) o;
- return numMismatches == other.numMismatches && numObservations == other.numObservations;
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
deleted file mode 100755
index 9b00b1876..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
-
-import java.util.Random;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class RecalDatum extends Datum {
-
- private static final double UNINITIALIZED = -1.0;
-
- private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
- private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
-
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // constructors
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
- numObservations = _numObservations;
- numMismatches = _numMismatches;
- estimatedQReported = reportedQuality;
- empiricalQuality = UNINITIALIZED;
- }
-
- public RecalDatum(final RecalDatum copy) {
- this.numObservations = copy.numObservations;
- this.numMismatches = copy.numMismatches;
- this.estimatedQReported = copy.estimatedQReported;
- this.empiricalQuality = copy.empiricalQuality;
- }
-
- public void combine(final RecalDatum other) {
- final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
- increment(other.numObservations, other.numMismatches);
- estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
- empiricalQuality = UNINITIALIZED;
- }
-
- @Override
- public void increment(final boolean isError) {
- super.increment(isError);
- empiricalQuality = UNINITIALIZED;
- }
-
- @Requires("empiricalQuality == UNINITIALIZED")
- @Ensures("empiricalQuality != UNINITIALIZED")
- protected final void calcEmpiricalQuality() {
- empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
- }
-
- public void setEstimatedQReported(final double estimatedQReported) {
- this.estimatedQReported = estimatedQReported;
- }
-
- public final double getEstimatedQReported() {
- return estimatedQReported;
- }
-
- public void setEmpiricalQuality(final double empiricalQuality) {
- this.empiricalQuality = empiricalQuality;
- }
-
- public final double getEmpiricalQuality() {
- if (empiricalQuality == UNINITIALIZED)
- calcEmpiricalQuality();
- return empiricalQuality;
- }
-
- @Override
- public String toString() {
- return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
- }
-
- public String stringForCSV() {
- return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
- }
-
- private double calcExpectedErrors() {
- return (double) this.numObservations * qualToErrorProb(estimatedQReported);
- }
-
- private double qualToErrorProb(final double qual) {
- return Math.pow(10.0, qual / -10.0);
- }
-
- public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
- final Random random = new Random();
- final int nObservations = random.nextInt(maxObservations);
- final int nErrors = random.nextInt(maxErrors);
- final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
- return new RecalDatum(nObservations, nErrors, (byte)qual);
- }
-
- /**
- * We don't compare the estimated quality reported because it may be different when read from
- * report tables.
- *
- * @param o the other recal datum
- * @return true if the two recal datums have the same number of observations, errors and empirical quality.
- */
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof RecalDatum))
- return false;
- RecalDatum other = (RecalDatum) o;
- return super.equals(o) &&
- MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index 2a94426a7..f04e4a1b3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.util.Collections;
@@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
* reads which have had the reference inserted because of color space inconsistencies.
*/
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
- public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
+ public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
/**
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles
@@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
* their color space tag can not be recalibrated.
*/
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
- public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+ public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
/**
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection {
public GATKReportTable generateReportTable() {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
argumentsTable.addColumn("Argument");
- argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+ argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addRowID("covariate", true);
- argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
+ argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
argumentsTable.addRowID("no_standard_covs", true);
- argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
+ argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.addRowID("run_without_dbsnp", true);
- argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
+ argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.addRowID("solid_recal_mode", true);
- argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
+ argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.addRowID("solid_nocall_strategy", true);
- argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
+ argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true);
- argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
+ argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true);
- argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
+ argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
- argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
+ argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);
- argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
+ argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("low_quality_tail", true);
- argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
+ argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true);
- argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
+ argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.addRowID("force_platform", true);
- argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
+ argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
- argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
+ argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
- argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
+ argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
- argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
+ argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
- argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
+ argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
- argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
+ argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
index aa62a18bc..38e306939 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
index a24506d07..08c7da754 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
@@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
index 92549b821..8fbd37e30 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
@@ -47,7 +47,10 @@ import java.util.List;
*
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
- * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Several important notes:
+ * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
+ * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
+ * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
*
Input
@@ -102,7 +105,7 @@ public class FastaAlternateReference extends FastaReference {
String refBase = String.valueOf((char)ref.getBase());
// Check to see if we have a called snp
- for ( VariantContext vc : tracker.getValues(variants) ) {
+ for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
if ( vc.isFiltered() )
continue;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
index 432bbd6d7..08a333486 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
- EXACT,
- POOL
+ EXACT
}
protected int N;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index 4253ff3ad..6fdc926d5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
- BOTH,
- POOLSNP,
- POOLINDEL,
- POOLBOTH
+ GeneralPloidySNP,
+ GeneralPloidyINDEL,
+ BOTH
}
public enum GENOTYPING_MODE {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index cd1815d82..c1c1339f5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -82,7 +82,7 @@ import java.util.*;
* -o snps.raw.vcf \
* -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \
- * -dcov [50] \
+ * -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
* [-L targets.interval_list]
*
*
@@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif
} else {
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
- if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
+/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
@@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
}
+ */
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index f73ab2471..f4bd196ae 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -50,6 +50,7 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
+ private static final String GPSTRING = "GeneralPloidy";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
@@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
- return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
+ return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) {
@@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine {
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
+ if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+ modelPrefix = GPSTRING + modelPrefix;
+
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
@@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine {
if ( vcInput.isSNP() ) {
// ignore SNPs if the user chose INDEL mode only
- if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+ if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
- else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
- models.add(UAC.GLmodel);
- }
+ }
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
- if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+ if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
- else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
- models.add(UAC.GLmodel);
}
// No support for other types yet
}
@@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine {
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
- models.add(UAC.GLmodel);
+ models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
}
@@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine {
}
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
+
List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins();
+ // user-specified name
+ String afModelName = UAC.AFmodel.name();
+
+ if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+ afModelName = GPSTRING + afModelName;
+
for (int i = 0; i < afClasses.size(); i++) {
Class extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
- if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
+ if (afModelName.equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
index 6047a15b4..b08def44f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
@@ -73,19 +73,7 @@ public class LeftAlignIndels extends ReadWalker {
@Output(required=false, doc="Output bam")
protected StingSAMFileWriter writer = null;
- /**
- * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
- * may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available
- * space (by setting java.io.tempdir on the command-line).
- */
- @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
- protected int MAX_RECORDS_IN_RAM = 500000;
-
- public void initialize() {
- // set up the output writer
- if ( writer != null )
- writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
- }
+ public void initialize() {}
private void emit(final SAMRecord read) {
if ( writer != null )
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
index f16deb701..f49e8f8c0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
- VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
+ VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
index e54dc6388..3e48520a7 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
@@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
return true;
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
// first subset to the samples
- VariantContext subContext = vc.subContextFromSamples(samples, true);
+ VariantContext subContext = vc.subContextFromSamples(samples);
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
index 0f55524a6..de832b108 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
@@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
if ( samples == null || samples.isEmpty() )
return true;
- VariantContext subContext = vc.subContextFromSamples(samples, false);
+ VariantContext subContext = vc.subContextFromSamples(samples);
if ( subContext.isPolymorphicInSamples() ) {
return true;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
index 0b395bc62..58cd14737 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
@@ -500,7 +500,10 @@ public class VariantEval extends RodWalker implements TreeRedu
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
- // find all of the matching comps
+ if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
+ // if either of these are NO_VARIATION they are LENIENT matches
+ return EvalCompMatchType.LENIENT;
+
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
index 6c4fcd26d..fe2437976 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
@@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
}
- public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
- if (vc1 != null) updateTiTv(vc1, false);
- if (vc2 != null) updateTiTv(vc2, true);
+ @Override
+ public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+ if (eval != null)
+ updateTiTv(eval, false);
+ if (comp != null)
+ updateTiTv(comp, true);
}
@Override
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
index 693bdf198..2ad08d806 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
@@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
final Collection knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
- if ( eval.getType() == c.getType() ) {
+ if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
return KNOWN_STATES;
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
index 3dcc1f85f..e84b0b10e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@@ -197,7 +197,9 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) {
- return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
+ // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
+ final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
+ return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
}
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@@ -262,12 +264,8 @@ public class VariantEvalUtils {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
- if (subsetBySample && vc.hasGenotypes()) {
- if ( variantEvalWalker.isSubsettingToSpecificSamples() )
- vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
- else
- vcsub = ensureAnnotations(vc, vc);
- }
+ if (subsetBySample && vc.hasGenotypes())
+ vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
if ((byFilter || !vcsub.isFiltered())) {
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
index 744a094c6..011f3471c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@@ -58,14 +58,11 @@ import java.util.*;
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
* slightly lower quality level.
*
- *
- * The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
+ * The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
*
* The tranches file that was generated by the VariantRecalibrator walker.
*
@@ -82,6 +79,7 @@ import java.util.*;
* --ts_filter_level 99.0 \
* -tranchesFile path/to/output.tranches \
* -recalFile path/to/output.recal \
+ * -mode SNP \
* -o path/to/output.recalibrated.filtered.vcf
*
*
@@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker implements T
for( final VariantContext vc : VCs ) {
- if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
+ if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
if( recalDatum == null ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index d6df4ff1b..45fdad4f8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -273,11 +274,37 @@ public class VariantDataManager {
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
- return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
- ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
+ return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
}
+ protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
+ switch( trainVC.getType() ) {
+ case SNP:
+ case MNP:
+ return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
+ case INDEL:
+ case MIXED:
+ case SYMBOLIC:
+ return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
+ default:
+ return false;
+ }
+ }
+
+ protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
+ switch( mode ) {
+ case SNP:
+ return evalVC.isSNP() || evalVC.isMNP();
+ case INDEL:
+ return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
+ case BOTH:
+ return true;
+ default:
+ throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
+ }
+ }
+
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
// we need to sort in coordinate order in order to produce a valid VCF
Collections.sort( data, new Comparator() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index 2a424e15b..ab2ff6176 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.FileNotFoundException;
@@ -65,7 +63,7 @@ import java.util.*;
* The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
* One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
* The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
- * between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
+ * between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
* variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
* as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
* error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
@@ -73,15 +71,9 @@ import java.util.*;
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
*
*
- * NOTE: Please see our best practices wiki page for our recommendations on which annotations to use for specific project designs.
- *
- *
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* See http://www.r-project.org for more info on how to download and install R.
*
- *
* The input raw variants to be recalibrated.
@@ -90,7 +82,7 @@ import java.util.*;
*
*
Output
*
- * A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
+ * A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
*
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
*
@@ -102,8 +94,9 @@ import java.util.*;
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
- * -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
- * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
+ * -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
+ * -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
+ * -mode SNP \
* -recalFile path/to/output.recal \
* -tranchesFile path/to/output.tranches \
* -rscriptFile path/to/output.plots.R
@@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker 127; -128 -> 128; -1 -> 255; etc.
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index f24bbb636..9a2cb68db 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -168,6 +168,28 @@ public class PluginManager {
String pluginName = getName(pluginClass);
pluginsByName.put(pluginName, pluginClass);
}
+
+ // sort the plugins so the order of elements is deterministic
+ sortPlugins(plugins);
+ sortPlugins(interfaces);
+ }
+
+ /**
+ * Sorts, in place, the list of plugins according to getName() on each element
+ *
+ * @param unsortedPlugins
+ */
+ private final void sortPlugins(final List> unsortedPlugins) {
+ Collections.sort(unsortedPlugins, new ComparePluginsByName());
+ }
+
+ private final class ComparePluginsByName implements Comparator> {
+ @Override
+ public int compare(final Class extends PluginType> aClass, final Class extends PluginType> aClass1) {
+ String pluginName1 = getName(aClass);
+ String pluginName2 = getName(aClass1);
+ return pluginName1.compareTo(pluginName2);
+ }
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
index a4383c3ae..554188bc1 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
@@ -4,7 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
index ba9267222..6392ce4ce 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
@@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
index 0f9cc34e7..0776b3aa8 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
@@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false;
+ private final static int ALLOWED_MAJOR_VERSION = 2;
+ private final static int MIN_MINOR_VERSION = 1;
+
+ private BCFVersion bcfVersion = null;
+
private VCFHeader header = null;
/**
@@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec {
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
try {
// note that this reads the magic as well, and so does double duty
- if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
- error("Input stream does not begin with BCF2 magic");
+ bcfVersion = BCFVersion.readBCFVersion(inputStream);
+ if ( bcfVersion == null )
+ error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
+
+ if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
+ error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
+ if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
+ error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
+
+ logger.info("BCF version " + bcfVersion);
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
@@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec {
FileInputStream fis = null;
try {
fis = new FileInputStream(path);
- return BCF2Utils.startsWithBCF2Magic(fis);
+ final BCFVersion version = BCFVersion.readBCFVersion(fis);
+ return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
} catch ( FileNotFoundException e ) {
return false;
} catch ( IOException e ) {
@@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec {
try {
if ( fis != null ) fis.close();
} catch ( IOException e ) {
- ; // do nothing
+ // do nothing
}
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
index a13be21c5..2619a4dae 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
@@ -202,7 +202,7 @@ public final class BCF2Decoder {
return null;
else {
final String s = new String(bytes, 0, goodLength);
- return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
+ return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
}
} catch ( IOException e ) {
throw new ReviewedStingException("readByte failure", e);
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
index 43e933948..c79abe2ae 100644
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
@@ -41,8 +41,6 @@ import java.util.*;
* @since 5/12
*/
public final class BCF2Utils {
- public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
-
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
public static final int OVERFLOW_ELEMENT_MARKER = 15;
@@ -75,68 +73,60 @@ public final class BCF2Utils {
*/
@Requires("header != null")
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
- public final static ArrayList makeDictionary(final VCFHeader header) {
+ public static ArrayList makeDictionary(final VCFHeader header) {
final Set seen = new HashSet();
final ArrayList dict = new ArrayList();
- boolean sawPASS = false;
+ // special case the special PASS field which doesn't show up in the FILTER field definitions
+ seen.add(VCFConstants.PASSES_FILTERS_v4);
+ dict.add(VCFConstants.PASSES_FILTERS_v4);
+
// set up the strings dictionary
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
- sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
-
- if ( ! sawPASS )
- dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
-
return dict;
}
@Requires({"nElements >= 0", "type != null"})
- public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
+ public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
return typeByte;
}
@Ensures("result >= 0")
- public final static int decodeSize(final byte typeDescriptor) {
+ public static int decodeSize(final byte typeDescriptor) {
return (0xF0 & typeDescriptor) >> 4;
}
@Ensures("result >= 0")
- public final static int decodeTypeID(final byte typeDescriptor) {
+ public static int decodeTypeID(final byte typeDescriptor) {
return typeDescriptor & 0x0F;
}
@Ensures("result != null")
- public final static BCF2Type decodeType(final byte typeDescriptor) {
+ public static BCF2Type decodeType(final byte typeDescriptor) {
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
}
- public final static boolean sizeIsOverflow(final byte typeDescriptor) {
+ public static boolean sizeIsOverflow(final byte typeDescriptor) {
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
}
@Requires("nElements >= 0")
- public final static boolean willOverflow(final long nElements) {
+ public static boolean willOverflow(final long nElements) {
return nElements > MAX_INLINE_ELEMENTS;
}
- public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
- final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
- stream.read(magicBytes);
- return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
- }
-
- public final static byte readByte(final InputStream stream) {
+ public static byte readByte(final InputStream stream) {
// TODO -- shouldn't be capturing error here
try {
return (byte)(stream.read() & 0xFF);
@@ -155,7 +145,7 @@ public final class BCF2Utils {
*/
@Requires({"strings != null", "strings.size() > 1"})
@Ensures("result != null")
- public static final String collapseStringList(final List strings) {
+ public static String collapseStringList(final List strings) {
final StringBuilder b = new StringBuilder();
for ( final String s : strings ) {
if ( s != null ) {
@@ -177,14 +167,14 @@ public final class BCF2Utils {
*/
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
@Ensures("result != null")
- public static final List exploreStringList(final String collapsed) {
+ public static List explodeStringList(final String collapsed) {
assert isCollapsedString(collapsed);
final String[] exploded = collapsed.substring(1).split(",");
return Arrays.asList(exploded);
}
@Requires("s != null")
- public static final boolean isCollapsedString(final String s) {
+ public static boolean isCollapsedString(final String s) {
return s.charAt(0) == ',';
}
@@ -226,7 +216,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
- public final static BCF2Type determineIntegerType(final int value) {
+ public static BCF2Type determineIntegerType(final int value) {
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
if ( potentialType.withinRange(value) )
return potentialType;
@@ -236,7 +226,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
- public final static BCF2Type determineIntegerType(final int[] values) {
+ public static BCF2Type determineIntegerType(final int[] values) {
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
@@ -262,7 +252,7 @@ public final class BCF2Utils {
*/
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
@Ensures("result.isIntegerType()")
- public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
+ public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
switch ( t1 ) {
case INT8: return t2;
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
@@ -272,7 +262,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
- public final static BCF2Type determineIntegerType(final List values) {
+ public static BCF2Type determineIntegerType(final List values) {
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
final BCF2Type type1 = determineIntegerType(value);
@@ -297,7 +287,7 @@ public final class BCF2Utils {
* @param o
* @return
*/
- public final static List