Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c3b6e2b143
2
ivy.xml
2
ivy.xml
|
|
@ -52,7 +52,7 @@
|
|||
<dependency org="gov.nist" name="Jama" rev="1.0.2"/>
|
||||
|
||||
<!-- Dependencies for the graph aligner -->
|
||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||
<dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
|
||||
|
||||
<!-- Dependencies for the html walker documention -->
|
||||
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
|
||||
|
|
|
|||
|
|
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
|
|
|
|||
|
|
@ -68,10 +68,10 @@ public class ErrorModel {
|
|||
break;
|
||||
}
|
||||
}
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
if (refSampleVC.isIndel()) {
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||
}
|
||||
|
|
@ -96,7 +96,8 @@ public class ErrorModel {
|
|||
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||
if (!haplotypeMap.isEmpty())
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||
}
|
||||
int idx = 0;
|
||||
for (PileupElement refPileupElement : refSamplePileup) {
|
||||
|
|
@ -108,7 +109,7 @@ public class ErrorModel {
|
|||
if (DEBUG) System.out.println(m);
|
||||
isMatch |= m;
|
||||
}
|
||||
if (refSampleVC.isIndel()) {
|
||||
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
|
||||
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
||||
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
||||
if (!isInformativeElement(perAlleleLikelihoods))
|
||||
|
|
@ -173,10 +174,10 @@ public class ErrorModel {
|
|||
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
||||
if (allele.isReference()) {
|
||||
// for a ref allele, any base mismatch or new indel is a mismatch.
|
||||
if(allele.getBases().length>0 )
|
||||
if(allele.getBases().length>0)
|
||||
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
||||
// could clean up/simplify this when unpadding is removed
|
||||
return (pileupElement.getBase() == refBase);
|
||||
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||
else
|
||||
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
||||
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
|
|
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
ploidy = UAC.samplePloidy;
|
||||
this.UAC = UAC;
|
||||
|
|
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
for ( final double[] likelihoods : GLs ) {
|
||||
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
|
||||
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
|
||||
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
|
||||
for (int k=1; k < acCount.length;k++) {
|
||||
if (acCount[k] > 0)
|
||||
|
|
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
return newPool;
|
||||
}
|
||||
|
||||
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
|
||||
// todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
|
||||
/**
|
||||
*
|
||||
* @param set ExactACset holding conformation to be computed
|
||||
|
|
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
continue;
|
||||
|
||||
|
||||
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
|
||||
GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
|
||||
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
|
||||
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
final double[] x = originalPool.getLikelihoodsAsVector(true);
|
||||
while(firstIterator.hasNext()) {
|
||||
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
|
||||
firstIterator.next();
|
||||
}
|
||||
|
||||
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
final double[] y = yy.clone();
|
||||
while(secondIterator.hasNext()) {
|
||||
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
|
||||
|
|
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// initialize output to -log10(choose(m1+m2,[k1 k2...])
|
||||
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
|
||||
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
|
||||
|
||||
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
|
||||
|
|
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
|
||||
|
||||
// for current conformation, get all possible ways to break vector K into two components G1 and G2
|
||||
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
|
||||
while (innerIterator.hasNext()) {
|
||||
// check if breaking current conformation into g1 and g2 is feasible.
|
||||
|
|
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
if ( numOriginalAltAlleles == numNewAltAlleles) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
|
||||
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
|
|
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// find the genotype with maximum likelihoods
|
||||
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
|
||||
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
|
||||
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
|
||||
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
|
||||
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public abstract class PoolGenotypeLikelihoods {
|
||||
public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||
protected final int numChromosomes;
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
|
|
@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
|
||||
private static final boolean FAST_GL_COMPUTATION = true;
|
||||
// constructor with given logPL elements
|
||||
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
|
||||
public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
|
||||
this.alleles = alleles;
|
||||
this.nAlleles = alleles.size();
|
||||
numChromosomes = ploidy;
|
||||
|
|
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
|
||||
} else {
|
||||
if (logLikelihoods.length != likelihoodDim)
|
||||
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
|
||||
throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
|
||||
|
||||
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
|
||||
}
|
||||
|
|
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
final int numAlleles = currentState.length;
|
||||
final int ploidy = restrictSumTo;
|
||||
|
||||
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
|
||||
linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("BUG: Not supported");
|
||||
|
|
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
|
||||
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
|
||||
|
||||
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
|
||||
int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
|
||||
double[] newPLs = new double[newPLSize];
|
||||
|
||||
|
||||
|
|
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
newCount[idx] = pVec[permutationKey[idx]];
|
||||
|
||||
// get corresponding index from new count
|
||||
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
|
||||
int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
|
||||
newPLs[outputIdx] = pl;
|
||||
if (VERBOSE) {
|
||||
System.out.println("Old Key:"+Arrays.toString(pVec));
|
||||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
|
||||
//protected Set<String> laneIDs;
|
||||
public enum Model {
|
||||
|
|
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC,logger);
|
||||
this.UAC = UAC;
|
||||
|
||||
|
|
@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
protected static class PoolGenotypeData {
|
||||
|
||||
public final String name;
|
||||
public final PoolGenotypeLikelihoods GL;
|
||||
public final GeneralPloidyGenotypeLikelihoods GL;
|
||||
public final int depth;
|
||||
public final List<Allele> alleles;
|
||||
|
||||
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
|
||||
public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
|
||||
this.name = name;
|
||||
this.GL = GL;
|
||||
this.depth = depth;
|
||||
|
|
@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
||||
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
||||
// actually compute likelihoods
|
||||
final int nGoodBases = GL.add(pileup, UAC);
|
||||
if ( nGoodBases > 0 )
|
||||
|
|
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
for ( PoolGenotypeData sampleData : GLs ) {
|
||||
// extract from multidimensional array
|
||||
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
|
||||
final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
|
||||
allAlleles, alleles);
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
|
|
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
Abstract methods - must be implemented in derived classes
|
||||
*/
|
||||
|
||||
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
|
|
@ -18,7 +18,7 @@ import java.util.*;
|
|||
* Time: 10:06 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||
public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
|
||||
final PairHMMIndelErrorModel pairModel;
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
||||
final ReferenceContext refContext;
|
||||
|
|
@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
|
||||
final byte refBase;
|
||||
|
||||
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext) {
|
||||
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.pairModel = pairModel;
|
||||
this.haplotypeMap = haplotypeMap;
|
||||
|
|
@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
|
||||
public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
|
||||
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
||||
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
|
@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
}
|
||||
*/
|
||||
|
||||
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
|
||||
protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
|
||||
super(UAC, logger);
|
||||
|
||||
|
||||
|
|
@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
}
|
||||
|
||||
|
||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation){
|
||||
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
||||
}
|
||||
|
||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
|
|
@ -96,6 +94,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
haplotypeMap.clear();
|
||||
}
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
|
||||
|
||||
// sanity check: if haplotype map couldn't be created, clear allele list
|
||||
if (haplotypeMap.isEmpty())
|
||||
alleles.clear();
|
||||
return alleles;
|
||||
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
|
|||
* and posteriors given a pile of bases and quality scores
|
||||
*
|
||||
*/
|
||||
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
|
||||
public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
|
||||
|
||||
final List<Allele> myAlleles;
|
||||
final int[] alleleIndices;
|
||||
|
|
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
|
|||
* @param useBQAedPileup Use BAQed pileup
|
||||
* @param ignoreLaneInformation If true, lane info is ignored
|
||||
*/
|
||||
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
|
||||
public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.useBAQedPileup = useBQAedPileup;
|
||||
|
||||
myAlleles = new ArrayList<Allele>(alleles);
|
||||
|
||||
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
|
||||
Allele refAllele = alleles.get(0);
|
||||
//sanity check: by construction, first allele should ALWAYS be the reference alleles
|
||||
if (!refAllele.isReference())
|
||||
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
|
||||
|
||||
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
|
||||
|
||||
if (myAlleles.size() < BaseUtils.BASES.length) {
|
||||
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
|
||||
|
|
@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
|
||||
public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
|
||||
|
||||
|
||||
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
|
||||
}
|
||||
|
||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation) {
|
||||
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
}
|
||||
|
||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
|
|
@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import java.util.Arrays;
|
|||
* Time: 7:44 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolAFCalculationModelUnitTest extends BaseTest {
|
||||
public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
||||
|
||||
static double[] AA1, AB1, BB1;
|
||||
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
|
|
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
|
|||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
|
||||
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
|
||||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||
double[] priors = new double[len]; // flat priors
|
||||
|
||||
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
|
||||
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
|
|
@ -41,7 +41,7 @@ import java.io.PrintStream;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class PoolGenotypeLikelihoodsUnitTest {
|
||||
public class GeneralPloidyGenotypeLikelihoodsUnitTest {
|
||||
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
final Logger logger = Logger.getLogger(Walker.class);
|
||||
|
|
@ -60,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
public void testStoringLikelihoodElements() {
|
||||
|
||||
|
||||
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
|
||||
// basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
|
||||
|
||||
int ploidy = 20;
|
||||
int numAlleles = 4;
|
||||
|
|
@ -78,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
for (int k=0; k < gls.length; k++)
|
||||
gls[k]= (double)k;
|
||||
|
||||
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
|
||||
GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
|
||||
double[] glnew = gl.getLikelihoods();
|
||||
|
||||
Assert.assertEquals(gls, glnew);
|
||||
|
|
@ -90,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
for (int ploidy = 2; ploidy < 10; ploidy++) {
|
||||
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
|
||||
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
|
||||
Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
|
||||
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
|
||||
}
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// create iterator, compare linear index given by iterator with closed form function
|
||||
int numAlleles = 4;
|
||||
int ploidy = 2;
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
System.out.format("\n%d:",iterator.getLinearIndex());
|
||||
|
|
@ -111,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
System.out.format("%d ",aa);
|
||||
|
||||
|
||||
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
|
||||
int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
|
||||
System.out.format("Computed idx = %d\n",computedIdx);
|
||||
iterator.next();
|
||||
}
|
||||
|
|
@ -140,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
allelesToSubset.add(Allele.create("A",false));
|
||||
allelesToSubset.add(Allele.create("C",false));
|
||||
|
||||
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
|
||||
double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
|
||||
originalAlleles, allelesToSubset);
|
||||
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testIndexIterator() {
|
||||
int[] seed = new int[]{1,2,3,4};
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
|
||||
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
|
||||
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
|
||||
|
||||
|
|
@ -228,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
}
|
||||
|
||||
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
|
||||
private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
int[] a = iterator.getCurrentVector();
|
||||
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
|
||||
int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
|
||||
if (VERBOSE) {
|
||||
System.out.format("%d:",iterator.getLinearIndex());
|
||||
for (int i=0; i < seed.length; i++)
|
||||
|
|
@ -289,13 +289,11 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
}
|
||||
|
||||
// TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
|
||||
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
|
||||
@Test (enabled = false)
|
||||
@Test
|
||||
public void testIndelErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final String altBases = (char)refByte + "TCA";
|
||||
final String altBases = "TCA";
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
||||
trueAlleles.add(Allele.create(refByte, true));
|
||||
|
|
@ -316,7 +314,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// get artificial alignment context for ref sample - no noise
|
||||
// CASE 1: Test HET insertion
|
||||
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
|
@ -393,6 +391,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
final byte refByte = readPileupTestProvider.getRefByte();
|
||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||
|
||||
|
||||
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
||||
final Set<String> laneIDs = new TreeSet<String>();
|
||||
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
|
||||
|
|
@ -409,23 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
for (String laneID : laneIDs)
|
||||
noisyErrorModels.put(laneID, Q30ErrorModel);
|
||||
|
||||
final int refIdx = 0;
|
||||
int altIdx = 2;
|
||||
|
||||
// ref allele must be first
|
||||
allAlleles.add(Allele.create(refByte, true));
|
||||
// all first ref allele
|
||||
allAlleles.add(Allele.create(refByte,true));
|
||||
for (byte b: BaseUtils.BASES) {
|
||||
if (refByte != b) {
|
||||
if (b == altByte)
|
||||
altIdx = allAlleles.size();
|
||||
if (refByte != b)
|
||||
allAlleles.add(Allele.create(b, false));
|
||||
}
|
||||
}
|
||||
|
||||
final int refIdx = 0;
|
||||
int altIdx = -1;
|
||||
|
||||
for (int k=0; k < allAlleles.size(); k++)
|
||||
if (altByte == allAlleles.get(k).getBases()[0]) {
|
||||
altIdx = k;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
PrintStream out = null;
|
||||
if (SIMULATE_NOISY_PILEUP) {
|
||||
try {
|
||||
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
|
||||
out = new PrintStream(new File("GLUnitTest.table"));
|
||||
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
|
||||
}
|
||||
catch (Exception e) {}
|
||||
|
|
@ -449,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
// get now likelihoods for this
|
||||
|
||||
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
|
||||
final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
|
||||
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
if (VERBOSE) {
|
||||
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
|
||||
|
|
@ -478,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
// get now likelihoods for this
|
||||
|
||||
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
|
||||
final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
|
||||
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
mlPair = noisyGL.getMostLikelyACCount();
|
||||
|
||||
|
|
@ -12,33 +12,33 @@ import org.testng.annotations.Test;
|
|||
* Time: 11:28 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolCallerIntegrationTest extends WalkerTest {
|
||||
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
|
||||
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||
final String REFSAMPLE_NAME = "NA12878";
|
||||
final String MTINTERVALS = "MT";
|
||||
final String MTINTERVALS = "MT:1-3000";
|
||||
final String LSVINTERVALS = "20:40,000,000-41,000,000";
|
||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
private void PC_MT_Test(String bam, String args, String name, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
|
||||
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
|
||||
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
|
||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
|
|
@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
library("ggplot2")
|
||||
library("tools") #For compactPDF in R 2.13+
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
data <- read.csv(args[1])
|
||||
|
|
@ -130,12 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
|
||||
// can't close tribble index when writing
|
||||
if ( message.indexOf("Unable to close index for") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
|
||||
|
||||
// disk is full
|
||||
if ( message.indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||
if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -274,6 +274,38 @@ public class GenomeAnalysisEngine {
|
|||
//return result;
|
||||
}
|
||||
|
||||
// TODO -- Let's move this to a utility class in unstable - but which one?
|
||||
// **************************************************************************************
|
||||
// * Handle Deprecated Walkers *
|
||||
// **************************************************************************************
|
||||
|
||||
// Mapping from walker name to major version number where the walker first disappeared
|
||||
private static Map<String, String> deprecatedGATKWalkers = new HashMap<String, String>();
|
||||
static {
|
||||
deprecatedGATKWalkers.put("CountCovariates", "2.0");
|
||||
deprecatedGATKWalkers.put("TableRecalibration", "2.0");
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to check whether a given walker has been deprecated in a previous GATK release
|
||||
*
|
||||
* @param walkerName the walker class name (not the full package) to check
|
||||
*/
|
||||
public static boolean isDeprecatedWalker(final String walkerName) {
|
||||
return deprecatedGATKWalkers.containsKey(walkerName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to check whether a given walker has been deprecated in a previous GATK release
|
||||
*
|
||||
* @param walkerName the walker class name (not the full package) to check
|
||||
*/
|
||||
public static String getDeprecatedMajorVersionNumber(final String walkerName) {
|
||||
return deprecatedGATKWalkers.get(walkerName);
|
||||
}
|
||||
|
||||
// **************************************************************************************
|
||||
|
||||
/**
|
||||
* Retrieves an instance of the walker based on the walker name.
|
||||
*
|
||||
|
|
@ -287,6 +319,9 @@ public class GenomeAnalysisEngine {
|
|||
if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) {
|
||||
e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK");
|
||||
}
|
||||
else if ( isDeprecatedWalker(walkerName) ) {
|
||||
e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName));
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
|
@ -762,6 +797,14 @@ public class GenomeAnalysisEngine {
|
|||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||
|
||||
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
|
||||
|
||||
if (argCollection.keepProgramRecords)
|
||||
removeProgramRecords = false;
|
||||
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
|
|
@ -778,7 +821,8 @@ public class GenomeAnalysisEngine {
|
|||
getWalkerBAQQualityMode(),
|
||||
refReader,
|
||||
getBaseRecalibration(),
|
||||
argCollection.defaultBaseQualities);
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -249,6 +249,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
|
||||
public boolean removeProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
|
||||
public boolean keepProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
|
||||
public ValidationExclusion.TYPE unsafe;
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,11 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final SAMFileReader.ValidationStringency validationStringency;
|
||||
|
||||
/**
|
||||
* Do we want to remove the program records from this data source?
|
||||
*/
|
||||
private final boolean removeProgramRecords;
|
||||
|
||||
/**
|
||||
* Store BAM indices for each reader present.
|
||||
*/
|
||||
|
|
@ -200,7 +205,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1);
|
||||
(byte) -1,
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -233,7 +239,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
|
|
@ -249,6 +256,7 @@ public class SAMDataSource {
|
|||
dispatcher = null;
|
||||
|
||||
validationStringency = strictness;
|
||||
this.removeProgramRecords = removeProgramRecords;
|
||||
if(readBufferSize != null)
|
||||
ReadShard.setReadBufferSize(readBufferSize);
|
||||
else {
|
||||
|
|
@ -748,7 +756,7 @@ public class SAMDataSource {
|
|||
private synchronized void createNewResource() {
|
||||
if(allResources.size() > maxEntries)
|
||||
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
|
||||
allResources.add(readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
|
@ -777,9 +785,11 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param readerIDs reads to load.
|
||||
* TODO: validationStringency is not used here
|
||||
* @param validationStringency validation stringency.
|
||||
* @param removeProgramRecords indicate whether to clear program records from the readers
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
|
||||
final int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
|
|
@ -790,6 +800,9 @@ public class SAMDataSource {
|
|||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
if (removeProgramRecords) {
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
if (stub.getGenerateMD5())
|
||||
factory.setCreateMd5File(true);
|
||||
// Adjust max records in RAM.
|
||||
// TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
|
||||
if(stub.getMaxRecordsInRam() != null)
|
||||
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class GATKRunReport {
|
|||
private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
|
||||
|
||||
// number of milliseconds before the S3 put operation is timed-out:
|
||||
private static final long S3PutTimeOut = 30 * 1000;
|
||||
private static final long S3PutTimeOut = 10 * 1000;
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -89,9 +89,9 @@ public class GATKReport {
|
|||
reader = new BufferedReader(new FileReader(file));
|
||||
reportHeader = reader.readLine();
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Could not open file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Could not read file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import java.util.List;
|
|||
@PartitionBy(PartitionType.READ)
|
||||
@ActiveRegionExtension(extension=50,maxRegion=1500)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
|
||||
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 8/2/12
|
||||
* Time: 1:58 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Indicates that program records should be removed from SAM headers by default for this walker
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface RemoveProgramRecords {
|
||||
}
|
||||
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.commandline.Gatherer;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
|
|||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".original");
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".recal");
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
|||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -58,7 +62,7 @@ import java.util.ArrayList;
|
|||
* This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
|
||||
* only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
|
||||
* of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
|
||||
* reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
|
||||
* reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
|
||||
* probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
|
||||
* The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
|
||||
* <p>
|
||||
|
|
@ -90,7 +94,7 @@ import java.util.ArrayList;
|
|||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
||||
* -T BaseQualityScoreRecalibrator \
|
||||
* -T BaseRecalibrator \
|
||||
* -I my_reads.bam \
|
||||
* -R resources/Homo_sapiens_assembly18.fasta \
|
||||
* -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
|
||||
|
|
@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
|
|
@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
|
||||
|
||||
if (RAC.LIST_ONLY) {
|
||||
RecalDataManager.listAvailableCovariates(logger);
|
||||
RecalUtils.listAvailableCovariates(logger);
|
||||
System.exit(0);
|
||||
}
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
|
||||
|
|
@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
|
||||
if (readNotSeen(read)) {
|
||||
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
|
||||
RecalDataManager.parsePlatformForRead(read, RAC);
|
||||
if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
|
||||
RecalUtils.parsePlatformForRead(read, RAC);
|
||||
if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
|
||||
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
|
||||
continue;
|
||||
}
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||
}
|
||||
|
||||
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalDataManager.isColorSpaceConsistent(read, offset))
|
||||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalUtils.isColorSpaceConsistent(read, offset))
|
||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
}
|
||||
countedSites++;
|
||||
|
|
@ -271,13 +275,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
public void onTraversalDone(Long result) {
|
||||
logger.info("Calculating quantized quality scores...");
|
||||
quantizeQualityScores();
|
||||
|
||||
logger.info("Writing recalibration report...");
|
||||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if (!RAC.NO_PLOTS) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
logger.info("Writing recalibration report...");
|
||||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
logger.info("Processed: " + result + " sites");
|
||||
}
|
||||
|
||||
|
|
@ -285,10 +292,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -309,7 +316,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
|
||||
}
|
||||
|
||||
RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jan 6, 2010
|
||||
*
|
||||
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
|
||||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class Datum {
|
||||
|
||||
long numObservations; // number of bases seen in total
|
||||
long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Datum() {
|
||||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
}
|
||||
|
||||
public Datum(long numObservations, long numMismatches) {
|
||||
this.numObservations = numObservations;
|
||||
this.numMismatches = numMismatches;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
}
|
||||
|
||||
synchronized void increment(final boolean isError) {
|
||||
numObservations++;
|
||||
numMismatches += isError ? 1:0;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
|
||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
byte empiricalQualByte() {
|
||||
final double doubleMismatches = (double) (numMismatches);
|
||||
final double doubleObservations = (double) (numObservations);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof Datum))
|
||||
return false;
|
||||
Datum other = (Datum) o;
|
||||
return numMismatches == other.numMismatches && numObservations == other.numObservations;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatum extends Datum {
|
||||
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
estimatedQReported = reportedQuality;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public RecalDatum(final RecalDatum copy) {
|
||||
this.numObservations = copy.numObservations;
|
||||
this.numMismatches = copy.numMismatches;
|
||||
this.estimatedQReported = copy.estimatedQReported;
|
||||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
increment(other.numObservations, other.numMismatches);
|
||||
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void increment(final boolean isError) {
|
||||
super.increment(isError);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
protected final void calcEmpiricalQuality() {
|
||||
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||
}
|
||||
|
||||
public void setEstimatedQReported(final double estimatedQReported) {
|
||||
this.estimatedQReported = estimatedQReported;
|
||||
}
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
|
||||
public void setEmpiricalQuality(final double empiricalQuality) {
|
||||
this.empiricalQuality = empiricalQuality;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
if (empiricalQuality == UNINITIALIZED)
|
||||
calcEmpiricalQuality();
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = random.nextInt(maxErrors);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
return new RecalDatum(nObservations, nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't compare the estimated quality reported because it may be different when read from
|
||||
* report tables.
|
||||
*
|
||||
* @param o the other recal datum
|
||||
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof RecalDatum))
|
||||
return false;
|
||||
RecalDatum other = (RecalDatum) o;
|
||||
return super.equals(o) &&
|
||||
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
|
|
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
|
|||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
*/
|
||||
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
|
|
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
|
|||
* their color space tag can not be recalibrated.
|
||||
*/
|
||||
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
||||
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches
|
||||
|
|
@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection {
|
|||
public GATKReportTable generateReportTable() {
|
||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
argumentsTable.addColumn("Argument");
|
||||
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addRowID("covariate", true);
|
||||
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
||||
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
||||
argumentsTable.addRowID("no_standard_covs", true);
|
||||
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.addRowID("solid_recal_mode", true);
|
||||
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.addRowID("solid_nocall_strategy", true);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.addRowID("mismatches_context_size", true);
|
||||
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("indels_context_size", true);
|
||||
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("low_quality_tail", true);
|
||||
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.addRowID("default_platform", true);
|
||||
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.addRowID("force_platform", true);
|
||||
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("keep_intermediate_files", true);
|
||||
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||
argumentsTable.addRowID("no_plots", true);
|
||||
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,10 @@ import java.util.List;
|
|||
* <p>
|
||||
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
||||
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
||||
* Note that if there are multiple variants at a site, it takes the first one seen.
|
||||
* Several important notes:
|
||||
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
|
||||
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
|
||||
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
|
||||
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
|
|
@ -102,7 +105,7 @@ public class FastaAlternateReference extends FastaReference {
|
|||
String refBase = String.valueOf((char)ref.getBase());
|
||||
|
||||
// Check to see if we have a called snp
|
||||
for ( VariantContext vc : tracker.getValues(variants) ) {
|
||||
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
|
||||
if ( vc.isFiltered() )
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
public enum Model {
|
||||
/** The default model with the best performance in all cases */
|
||||
EXACT,
|
||||
POOL
|
||||
EXACT
|
||||
}
|
||||
|
||||
protected int N;
|
||||
|
|
|
|||
|
|
@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
POOLBOTH
|
||||
GeneralPloidySNP,
|
||||
GeneralPloidyINDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
public enum GENOTYPING_MODE {
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ import java.util.*;
|
|||
* -o snps.raw.vcf \
|
||||
* -stand_call_conf [50.0] \
|
||||
* -stand_emit_conf 10.0 \
|
||||
* -dcov [50] \
|
||||
* -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
|
||||
* [-L targets.interval_list]
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
} else {
|
||||
// in full mode: check for consistency in ploidy/pool calling arguments
|
||||
// check for correct calculation models
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
// polyploidy requires POOL GL and AF calculation models to be specified right now
|
||||
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
|
||||
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
|
||||
|
|
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
|
||||
|
||||
}
|
||||
*/
|
||||
// get all of the unique sample names
|
||||
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
|
||||
samples.clear();
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ import java.util.*;
|
|||
|
||||
public class UnifiedGenotyperEngine {
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
private static final String GPSTRING = "GeneralPloidy";
|
||||
|
||||
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
|
||||
|
||||
|
|
@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine {
|
|||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
|
||||
|
||||
if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
|
||||
modelPrefix = GPSTRING + modelPrefix;
|
||||
|
||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||
|
|
@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
if ( vcInput.isSNP() ) {
|
||||
// ignore SNPs if the user chose INDEL mode only
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
|
||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
}
|
||||
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
||||
// ignore INDELs if the user chose SNP mode only
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
// No support for other types yet
|
||||
}
|
||||
|
|
@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine {
|
|||
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
|
||||
}
|
||||
else {
|
||||
models.add(UAC.GLmodel);
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
|
||||
|
||||
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
|
||||
|
||||
// user-specified name
|
||||
String afModelName = UAC.AFmodel.name();
|
||||
|
||||
if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
|
||||
afModelName = GPSTRING + afModelName;
|
||||
|
||||
for (int i = 0; i < afClasses.size(); i++) {
|
||||
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
|
||||
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
|
||||
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
|
||||
if (afModelName.equalsIgnoreCase(key)) {
|
||||
try {
|
||||
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
|
||||
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
|
||||
|
|
|
|||
|
|
@ -73,19 +73,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
|||
@Output(required=false, doc="Output bam")
|
||||
protected StingSAMFileWriter writer = null;
|
||||
|
||||
/**
|
||||
* If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
|
||||
* may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available
|
||||
* space (by setting java.io.tempdir on the command-line).
|
||||
*/
|
||||
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
|
||||
protected int MAX_RECORDS_IN_RAM = 500000;
|
||||
|
||||
public void initialize() {
|
||||
// set up the output writer
|
||||
if ( writer != null )
|
||||
writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
|
||||
}
|
||||
public void initialize() {}
|
||||
|
||||
private void emit(final SAMRecord read) {
|
||||
if ( writer != null )
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
|
||||
@Requires({"eval != null", "comp != null"})
|
||||
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
|
||||
// find all of the matching comps
|
||||
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
|
||||
// if either of these are NO_VARIATION they are LENIENT matches
|
||||
return EvalCompMatchType.LENIENT;
|
||||
|
||||
if ( comp.getType() != eval.getType() )
|
||||
return EvalCompMatchType.NO_MATCH;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc1 != null) updateTiTv(vc1, false);
|
||||
if (vc2 != null) updateTiTv(vc2, true);
|
||||
@Override
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (eval != null)
|
||||
updateTiTv(eval, false);
|
||||
if (comp != null)
|
||||
updateTiTv(comp, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
|
||||
for ( final VariantContext c : knownComps ) {
|
||||
// loop over sites, looking for something that matches the type eval
|
||||
if ( eval.getType() == c.getType() ) {
|
||||
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
|
||||
return KNOWN_STATES;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -197,7 +197,9 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
|
||||
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
|
||||
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
|
||||
}
|
||||
|
||||
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
|
||||
|
|
@ -262,12 +264,8 @@ public class VariantEvalUtils {
|
|||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (subsetBySample && vc.hasGenotypes()) {
|
||||
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
else
|
||||
vcsub = ensureAnnotations(vc, vc);
|
||||
}
|
||||
if (subsetBySample && vc.hasGenotypes())
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
|
||||
if ((byFilter || !vcsub.isFiltered())) {
|
||||
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
|
||||
|
|
|
|||
|
|
@ -58,14 +58,11 @@ import java.util.*;
|
|||
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
|
||||
* slightly lower quality level.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The input raw variants to be recalibrated.
|
||||
* <p>
|
||||
* The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
|
||||
* The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
|
||||
* <p>
|
||||
* The tranches file that was generated by the VariantRecalibrator walker.
|
||||
*
|
||||
|
|
@ -82,6 +79,7 @@ import java.util.*;
|
|||
* --ts_filter_level 99.0 \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -recalFile path/to/output.recal \
|
||||
* -mode SNP \
|
||||
* -o path/to/output.recalibrated.filtered.vcf
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
for( final VariantContext vc : VCs ) {
|
||||
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
|
||||
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
|
||||
if( recalDatum == null ) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -273,11 +274,37 @@ public class VariantDataManager {
|
|||
}
|
||||
|
||||
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
|
||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
|
||||
}
|
||||
|
||||
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
|
||||
switch( trainVC.getType() ) {
|
||||
case SNP:
|
||||
case MNP:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
|
||||
case INDEL:
|
||||
case MIXED:
|
||||
case SYMBOLIC:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
switch( mode ) {
|
||||
case SNP:
|
||||
return evalVC.isSNP() || evalVC.isMNP();
|
||||
case INDEL:
|
||||
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
|
||||
case BOTH:
|
||||
return true;
|
||||
default:
|
||||
throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
|
||||
}
|
||||
}
|
||||
|
||||
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
|
||||
// we need to sort in coordinate order in order to produce a valid VCF
|
||||
Collections.sort( data, new Comparator<VariantDatum>() {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
|
|
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -65,7 +63,7 @@ import java.util.*;
|
|||
* The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
|
||||
* One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
|
||||
* The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
|
||||
* between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
|
||||
* between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
|
||||
* variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
|
||||
* as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
|
||||
* error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
|
||||
|
|
@ -73,15 +71,9 @@ import java.util.*;
|
|||
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
|
||||
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The input raw variants to be recalibrated.
|
||||
|
|
@ -90,7 +82,7 @@ import java.util.*;
|
|||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
|
||||
* A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
|
||||
* <p>
|
||||
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
||||
*
|
||||
|
|
@ -102,8 +94,9 @@ import java.util.*;
|
|||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
|
||||
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
|
||||
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
|
||||
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
|
||||
* -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
|
||||
* -mode SNP \
|
||||
* -recalFile path/to/output.recal \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -rscriptFile path/to/output.plots.R
|
||||
|
|
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Advanced
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
//@Hidden
|
||||
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
|
||||
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -255,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
|
||||
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
|
||||
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
|
||||
if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
|
||||
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
|
||||
|
|
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||
//}
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
|
||||
mapList.add( datum );
|
||||
|
|
@ -282,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
return mapList;
|
||||
}
|
||||
|
||||
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
|
|||
|
|
@ -69,9 +69,14 @@ public class QualityUtils {
|
|||
* @return a probability (0.0 - 1.0)
|
||||
*/
|
||||
static private double qualToErrorProbRaw(int qual) {
|
||||
return qualToErrorProb((double) qual);
|
||||
}
|
||||
|
||||
public static double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,6 +168,28 @@ public class PluginManager<PluginType> {
|
|||
String pluginName = getName(pluginClass);
|
||||
pluginsByName.put(pluginName, pluginClass);
|
||||
}
|
||||
|
||||
// sort the plugins so the order of elements is deterministic
|
||||
sortPlugins(plugins);
|
||||
sortPlugins(interfaces);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts, in place, the list of plugins according to getName() on each element
|
||||
*
|
||||
* @param unsortedPlugins
|
||||
*/
|
||||
private final void sortPlugins(final List<Class<? extends PluginType>> unsortedPlugins) {
|
||||
Collections.sort(unsortedPlugins, new ComparePluginsByName());
|
||||
}
|
||||
|
||||
private final class ComparePluginsByName implements Comparator<Class<? extends PluginType>> {
|
||||
@Override
|
||||
public int compare(final Class<? extends PluginType> aClass, final Class<? extends PluginType> aClass1) {
|
||||
String pluginName1 = getName(aClass);
|
||||
String pluginName2 = getName(aClass1);
|
||||
return pluginName1.compareTo(pluginName2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private final static boolean FORBID_SYMBOLICS = false;
|
||||
|
||||
private final static int ALLOWED_MAJOR_VERSION = 2;
|
||||
private final static int MIN_MINOR_VERSION = 1;
|
||||
|
||||
private BCFVersion bcfVersion = null;
|
||||
|
||||
private VCFHeader header = null;
|
||||
|
||||
/**
|
||||
|
|
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
|
||||
try {
|
||||
// note that this reads the magic as well, and so does double duty
|
||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||
error("Input stream does not begin with BCF2 magic");
|
||||
bcfVersion = BCFVersion.readBCFVersion(inputStream);
|
||||
if ( bcfVersion == null )
|
||||
error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
|
||||
|
||||
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
|
||||
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
|
||||
|
||||
logger.info("BCF version " + bcfVersion);
|
||||
|
||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||
|
||||
|
|
@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
FileInputStream fis = null;
|
||||
try {
|
||||
fis = new FileInputStream(path);
|
||||
return BCF2Utils.startsWithBCF2Magic(fis);
|
||||
final BCFVersion version = BCFVersion.readBCFVersion(fis);
|
||||
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
try {
|
||||
if ( fis != null ) fis.close();
|
||||
} catch ( IOException e ) {
|
||||
; // do nothing
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -202,7 +202,7 @@ public final class BCF2Decoder {
|
|||
return null;
|
||||
else {
|
||||
final String s = new String(bytes, 0, goodLength);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
|
|
|
|||
|
|
@ -41,8 +41,6 @@ import java.util.*;
|
|||
* @since 5/12
|
||||
*/
|
||||
public final class BCF2Utils {
|
||||
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
|
||||
|
||||
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
||||
|
||||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||
|
|
@ -75,68 +73,60 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
public static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
|
||||
boolean sawPASS = false;
|
||||
// special case the special PASS field which doesn't show up in the FILTER field definitions
|
||||
seen.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
|
||||
// set up the strings dictionary
|
||||
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
|
||||
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
|
||||
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
if ( ! seen.contains(idLine.getID())) {
|
||||
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(idLine.getID());
|
||||
seen.add(idLine.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( ! sawPASS )
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||
return typeByte;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeSize(final byte typeDescriptor) {
|
||||
public static int decodeSize(final byte typeDescriptor) {
|
||||
return (0xF0 & typeDescriptor) >> 4;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
||||
public static int decodeTypeID(final byte typeDescriptor) {
|
||||
return typeDescriptor & 0x0F;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public final static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
public static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
||||
}
|
||||
|
||||
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||
public static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||
}
|
||||
|
||||
@Requires("nElements >= 0")
|
||||
public final static boolean willOverflow(final long nElements) {
|
||||
public static boolean willOverflow(final long nElements) {
|
||||
return nElements > MAX_INLINE_ELEMENTS;
|
||||
}
|
||||
|
||||
public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
|
||||
final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
|
||||
stream.read(magicBytes);
|
||||
return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
|
||||
}
|
||||
|
||||
public final static byte readByte(final InputStream stream) {
|
||||
public static byte readByte(final InputStream stream) {
|
||||
// TODO -- shouldn't be capturing error here
|
||||
try {
|
||||
return (byte)(stream.read() & 0xFF);
|
||||
|
|
@ -155,7 +145,7 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"strings != null", "strings.size() > 1"})
|
||||
@Ensures("result != null")
|
||||
public static final String collapseStringList(final List<String> strings) {
|
||||
public static String collapseStringList(final List<String> strings) {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
for ( final String s : strings ) {
|
||||
if ( s != null ) {
|
||||
|
|
@ -177,14 +167,14 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
||||
@Ensures("result != null")
|
||||
public static final List<String> exploreStringList(final String collapsed) {
|
||||
public static List<String> explodeStringList(final String collapsed) {
|
||||
assert isCollapsedString(collapsed);
|
||||
final String[] exploded = collapsed.substring(1).split(",");
|
||||
return Arrays.asList(exploded);
|
||||
}
|
||||
|
||||
@Requires("s != null")
|
||||
public static final boolean isCollapsedString(final String s) {
|
||||
public static boolean isCollapsedString(final String s) {
|
||||
return s.charAt(0) == ',';
|
||||
}
|
||||
|
||||
|
|
@ -226,7 +216,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
public static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
|
|
@ -236,7 +226,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
public static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
|
|
@ -262,7 +252,7 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||
|
|
@ -272,7 +262,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
public static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
|
|
@ -297,7 +287,7 @@ public final class BCF2Utils {
|
|||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final static List<Object> toList(final Object o) {
|
||||
public static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
|
|
@ -305,7 +295,7 @@ public final class BCF2Utils {
|
|||
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
public static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
|
|
@ -323,7 +313,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Simple holder for BCF version information
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/2/12
|
||||
* Time: 2:16 PM
|
||||
*/
|
||||
public class BCFVersion {
|
||||
/**
|
||||
* BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
|
||||
* and m is the minor version, currently 1
|
||||
*/
|
||||
public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
|
||||
|
||||
final int majorVersion;
|
||||
final int minorVersion;
|
||||
|
||||
public BCFVersion(int majorVersion, int minorVersion) {
|
||||
this.majorVersion = majorVersion;
|
||||
this.minorVersion = minorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the major version number of this BCF file
|
||||
*/
|
||||
public int getMajorVersion() {
|
||||
return majorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the minor version number of this BCF file
|
||||
*/
|
||||
public int getMinorVersion() {
|
||||
return minorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new BCFVersion object describing the major and minor version of the BCF file in stream
|
||||
*
|
||||
* Note that stream must be at the very start of the file.
|
||||
*
|
||||
* @param stream
|
||||
* @return a BCFVersion object, or null if stream doesn't contain a BCF file
|
||||
* @throws IOException
|
||||
*/
|
||||
public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
|
||||
final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
|
||||
stream.read(magicBytes);
|
||||
if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
|
||||
// we're a BCF file
|
||||
final int majorByte = stream.read();
|
||||
final int minorByte = stream.read();
|
||||
return new BCFVersion( majorByte, minorByte );
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
|
||||
* @param out
|
||||
* @throws IOException
|
||||
*/
|
||||
public void write(final OutputStream out) throws IOException {
|
||||
out.write(MAGIC_HEADER_START);
|
||||
out.write(getMajorVersion() & 0xFF);
|
||||
out.write(getMinorVersion() & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
|
||||
}
|
||||
}
|
||||
|
|
@ -316,9 +316,9 @@ public class UserException extends ReviewedStingException {
|
|||
|
||||
|
||||
|
||||
public static class MissingWalker extends UserException {
|
||||
public MissingWalker(String walkerName, String message) {
|
||||
super(String.format("Walker %s is not available: %s", walkerName, message));
|
||||
public static class DeprecatedWalker extends UserException {
|
||||
public DeprecatedWalker(String walkerName, String version) {
|
||||
super(String.format("Walker %s is no longer available in the GATK; it has been deprecated since version %s", walkerName, version));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import net.sf.samtools.SAMTag;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
|
|
@ -103,7 +103,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
read.setBaseQualities(null, errorModel);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -41,7 +39,7 @@ public class QuantizationInfo {
|
|||
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
||||
qualHistogram[empiricalQual] += datum.getNumObservations(); // add the number of observations for every key
|
||||
}
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
quantizeQualityScores(quantizationLevels);
|
||||
|
|
@ -70,15 +68,15 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public GATKReportTable generateReportTable() {
|
||||
GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
|
||||
quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual);
|
||||
quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||
quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||
}
|
||||
return quantizedTable;
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
|
|
@ -0,0 +1,305 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
|
||||
* of reference mismatches seen for that combination of covariates.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*/
|
||||
@Invariant({
|
||||
"estimatedQReported >= 0.0",
|
||||
"! Double.isNaN(estimatedQReported)",
|
||||
"! Double.isInfinite(estimatedQReported)",
|
||||
"empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
|
||||
"! Double.isNaN(empiricalQuality)",
|
||||
"! Double.isInfinite(empiricalQuality)",
|
||||
"numObservations >= 0",
|
||||
"numMismatches >= 0",
|
||||
"numMismatches <= numObservations"
|
||||
})
|
||||
public class RecalDatum {
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
/**
|
||||
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
*/
|
||||
private double estimatedQReported;
|
||||
|
||||
/**
|
||||
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
*/
|
||||
private double empiricalQuality;
|
||||
|
||||
/**
|
||||
* number of bases seen in total
|
||||
*/
|
||||
private long numObservations;
|
||||
|
||||
/**
|
||||
* number of bases seen that didn't match the reference
|
||||
*/
|
||||
private long numMismatches;
|
||||
|
||||
/**
|
||||
* used when calculating empirical qualities to avoid division by zero
|
||||
*/
|
||||
private static final int SMOOTHING_CONSTANT = 1;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
|
||||
*
|
||||
* @param _numObservations
|
||||
* @param _numMismatches
|
||||
* @param reportedQuality
|
||||
*/
|
||||
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
|
||||
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
|
||||
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
estimatedQReported = reportedQuality;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy copy into this recal datum, overwriting all of this objects data
|
||||
* @param copy
|
||||
*/
|
||||
public RecalDatum(final RecalDatum copy) {
|
||||
this.numObservations = copy.getNumObservations();
|
||||
this.numMismatches = copy.getNumMismatches();
|
||||
this.estimatedQReported = copy.estimatedQReported;
|
||||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add in all of the data from other into this object, updating the reported quality from the expected
|
||||
* error rate implied by the two reported qualities
|
||||
*
|
||||
* @param other
|
||||
*/
|
||||
public synchronized void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
increment(other.getNumObservations(), other.getNumMismatches());
|
||||
estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations());
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
||||
if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
|
||||
if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
|
||||
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
|
||||
|
||||
this.estimatedQReported = estimatedQReported;
|
||||
}
|
||||
|
||||
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = random.nextInt(maxErrors);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
return new RecalDatum(nObservations, nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
public final byte getEstimatedQReportedAsByte() {
|
||||
return (byte)(int)(Math.round(getEstimatedQReported()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Empirical quality score -- derived from the num mismatches and observations
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the error rate (in real space) of this interval, or 0 if there are no obserations
|
||||
* @return the empirical error rate ~= N errors / N obs
|
||||
*/
|
||||
@Ensures("result >= 0.0")
|
||||
public double getEmpiricalErrorRate() {
|
||||
if ( numObservations == 0 )
|
||||
return 0.0;
|
||||
else {
|
||||
// cache the value so we don't call log over and over again
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
// smoothing is one error and one non-error observation, for example
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
|
||||
return doubleMismatches / doubleObservations;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
||||
if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
|
||||
if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
|
||||
if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
|
||||
|
||||
this.empiricalQuality = empiricalQuality;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
if (empiricalQuality == UNINITIALIZED)
|
||||
calcEmpiricalQuality();
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
public final byte getEmpiricalQualityAsByte() {
|
||||
return (byte)(Math.round(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
// /**
|
||||
// * We don't compare the estimated quality reported because it may be different when read from
|
||||
// * report tables.
|
||||
// *
|
||||
// * @param o the other recal datum
|
||||
// * @return true if the two recal datums have the same number of observations, errors and empirical quality.
|
||||
// */
|
||||
// @Override
|
||||
// public boolean equals(Object o) {
|
||||
// if (!(o instanceof RecalDatum))
|
||||
// return false;
|
||||
// RecalDatum other = (RecalDatum) o;
|
||||
// return super.equals(o) &&
|
||||
// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||
// }
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public long getNumObservations() {
|
||||
return numObservations;
|
||||
}
|
||||
|
||||
public synchronized void setNumObservations(final long numObservations) {
|
||||
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
this.numObservations = numObservations;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public long getNumMismatches() {
|
||||
return numMismatches;
|
||||
}
|
||||
|
||||
@Requires({"numMismatches >= 0"})
|
||||
public synchronized void setNumMismatches(final long numMismatches) {
|
||||
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
this.numMismatches = numMismatches;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"by >= 0"})
|
||||
public synchronized void incrementNumObservations(final long by) {
|
||||
numObservations += by;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"by >= 0"})
|
||||
public synchronized void incrementNumMismatches(final long by) {
|
||||
numMismatches += by;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
||||
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
||||
public synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
incrementNumObservations(incObservations);
|
||||
incrementNumMismatches(incMismatches);
|
||||
}
|
||||
|
||||
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
||||
public synchronized void increment(final boolean isError) {
|
||||
incrementNumObservations(1);
|
||||
if ( isError )
|
||||
incrementNumMismatches(1);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Private implementation helper functions
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
|
||||
*/
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
private synchronized final void calcEmpiricalQuality() {
|
||||
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate the expected number of errors given the estimated Q reported and the number of observations
|
||||
* in this datum.
|
||||
*
|
||||
* @return a positive (potentially fractional) estimate of the number of errors
|
||||
*/
|
||||
@Ensures("result >= 0.0")
|
||||
private double calcExpectedErrors() {
|
||||
return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,309 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 07/27/12
|
||||
*/
|
||||
public class RecalDatumNode<T extends RecalDatum> {
|
||||
private final static boolean USE_CHI2 = true;
|
||||
protected static Logger logger = Logger.getLogger(RecalDatumNode.class);
|
||||
private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY;
|
||||
private final T recalDatum;
|
||||
private double fixedPenalty = UNINITIALIZED;
|
||||
private final Set<RecalDatumNode<T>> subnodes;
|
||||
|
||||
public RecalDatumNode(final T recalDatum) {
|
||||
this(recalDatum, new HashSet<RecalDatumNode<T>>());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return recalDatum.toString();
|
||||
}
|
||||
|
||||
public RecalDatumNode(final T recalDatum, final Set<RecalDatumNode<T>> subnodes) {
|
||||
this(recalDatum, UNINITIALIZED, subnodes);
|
||||
}
|
||||
|
||||
protected RecalDatumNode(final T recalDatum, final double fixedPenalty) {
|
||||
this(recalDatum, fixedPenalty, new HashSet<RecalDatumNode<T>>());
|
||||
}
|
||||
|
||||
protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set<RecalDatumNode<T>> subnodes) {
|
||||
this.recalDatum = recalDatum;
|
||||
this.fixedPenalty = fixedPenalty;
|
||||
this.subnodes = new HashSet<RecalDatumNode<T>>(subnodes);
|
||||
}
|
||||
|
||||
public T getRecalDatum() {
|
||||
return recalDatum;
|
||||
}
|
||||
|
||||
public Set<RecalDatumNode<T>> getSubnodes() {
|
||||
return subnodes;
|
||||
}
|
||||
|
||||
public double getPenalty() {
|
||||
if ( fixedPenalty != UNINITIALIZED )
|
||||
return fixedPenalty;
|
||||
else
|
||||
return calcPenalty();
|
||||
}
|
||||
|
||||
public double calcAndSetFixedPenalty(final boolean doEntireTree) {
|
||||
fixedPenalty = calcPenalty();
|
||||
if ( doEntireTree )
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
sub.calcAndSetFixedPenalty(doEntireTree);
|
||||
return fixedPenalty;
|
||||
}
|
||||
|
||||
public void addSubnode(final RecalDatumNode<T> sub) {
|
||||
subnodes.add(sub);
|
||||
}
|
||||
|
||||
public boolean isLeaf() {
|
||||
return subnodes.isEmpty();
|
||||
}
|
||||
|
||||
public int getNumBranches() {
|
||||
return subnodes.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Total penalty is the sum of leaf node penalties
|
||||
*
|
||||
* This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by
|
||||
* definition have 0 penalty unless they represent a pruned tree with underlying -- but now
|
||||
* pruned -- subtrees
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double totalPenalty() {
|
||||
if ( isLeaf() )
|
||||
return getPenalty();
|
||||
else {
|
||||
double sum = 0.0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
sum += sub.totalPenalty();
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
public int maxDepth() {
|
||||
int subMax = 0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subMax = Math.max(subMax, sub.maxDepth());
|
||||
return subMax + 1;
|
||||
}
|
||||
|
||||
public int minDepth() {
|
||||
if ( isLeaf() )
|
||||
return 1;
|
||||
else {
|
||||
int subMin = Integer.MAX_VALUE;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subMin = Math.min(subMin, sub.minDepth());
|
||||
return subMin + 1;
|
||||
}
|
||||
}
|
||||
|
||||
public int size() {
|
||||
int size = 1;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
size += sub.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
public int numLeaves() {
|
||||
if ( isLeaf() )
|
||||
return 1;
|
||||
else {
|
||||
int size = 0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
size += sub.numLeaves();
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
private double calcPenalty() {
|
||||
if ( USE_CHI2 )
|
||||
return calcPenaltyChi2();
|
||||
else
|
||||
return calcPenaltyLog10(getRecalDatum().getEmpiricalErrorRate());
|
||||
}
|
||||
|
||||
private double calcPenaltyChi2() {
|
||||
if ( isLeaf() )
|
||||
return 0.0;
|
||||
else {
|
||||
final long[][] counts = new long[subnodes.size()][2];
|
||||
|
||||
int i = 0;
|
||||
for ( RecalDatumNode<T> subnode : subnodes ) {
|
||||
counts[i][0] = subnode.getRecalDatum().getNumMismatches();
|
||||
counts[i][1] = subnode.getRecalDatum().getNumObservations();
|
||||
i++;
|
||||
}
|
||||
|
||||
final double chi2 = new ChiSquareTestImpl().chiSquare(counts);
|
||||
|
||||
// StringBuilder x = new StringBuilder();
|
||||
// StringBuilder y = new StringBuilder();
|
||||
// for ( int k = 0; k < counts.length; k++) {
|
||||
// if ( k != 0 ) {
|
||||
// x.append(", ");
|
||||
// y.append(", ");
|
||||
// }
|
||||
// x.append(counts[k][0]);
|
||||
// y.append(counts[k][1]);
|
||||
// }
|
||||
// logger.info("x = c(" + x.toString() + ")");
|
||||
// logger.info("y = c(" + y.toString() + ")");
|
||||
// logger.info("chi2 = " + chi2);
|
||||
|
||||
return chi2;
|
||||
//return Math.log10(chi2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the penalty of this interval, given the overall error rate for the interval
|
||||
*
|
||||
* If the globalErrorRate is e, this value is:
|
||||
*
|
||||
* sum_i |log10(e_i) - log10(e)| * nObservations_i
|
||||
*
|
||||
* each the index i applies to all leaves of the tree accessible from this interval
|
||||
* (found recursively from subnodes as necessary)
|
||||
*
|
||||
* @param globalErrorRate overall error rate in real space against which we calculate the penalty
|
||||
* @return the cost of approximating the bins in this interval with the globalErrorRate
|
||||
*/
|
||||
@Requires("globalErrorRate >= 0.0")
|
||||
@Ensures("result >= 0.0")
|
||||
private double calcPenaltyLog10(final double globalErrorRate) {
|
||||
if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
|
||||
return 0.0;
|
||||
|
||||
if ( isLeaf() ) {
|
||||
// this is leave node
|
||||
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
|
||||
// TODO -- how we can generalize this calculation?
|
||||
// if ( this.qEnd <= minInterestingQual )
|
||||
// // It's free to merge up quality scores below the smallest interesting one
|
||||
// return 0;
|
||||
// else {
|
||||
// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations();
|
||||
// }
|
||||
} else {
|
||||
double sum = 0;
|
||||
for ( final RecalDatumNode<T> hrd : subnodes)
|
||||
sum += hrd.calcPenaltyLog10(globalErrorRate);
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
public RecalDatumNode<T> pruneToDepth(final int maxDepth) {
|
||||
if ( maxDepth < 1 )
|
||||
throw new IllegalArgumentException("maxDepth < 1");
|
||||
else {
|
||||
final Set<RecalDatumNode<T>> subPruned = new HashSet<RecalDatumNode<T>>(getNumBranches());
|
||||
if ( maxDepth > 1 )
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subPruned.add(sub.pruneToDepth(maxDepth - 1));
|
||||
return new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, subPruned);
|
||||
}
|
||||
}
|
||||
|
||||
public RecalDatumNode<T> pruneByPenalty(final int maxElements) {
|
||||
RecalDatumNode<T> root = this;
|
||||
|
||||
while ( root.size() > maxElements ) {
|
||||
// remove the lowest penalty element, and continue
|
||||
root = root.removeLowestPenaltyNode();
|
||||
}
|
||||
|
||||
// our size is below the target, so we are good, return
|
||||
return root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the lowest penalty node in the tree, and return a tree without it
|
||||
*
|
||||
* Note this excludes the current (root) node
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private RecalDatumNode<T> removeLowestPenaltyNode() {
|
||||
final Pair<RecalDatumNode<T>, Double> nodeToRemove = getMinPenaltyNode();
|
||||
logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond());
|
||||
|
||||
final Pair<RecalDatumNode<T>, Boolean> result = removeNode(nodeToRemove.getFirst());
|
||||
|
||||
if ( ! result.getSecond() )
|
||||
throw new IllegalStateException("Never removed any node!");
|
||||
|
||||
final RecalDatumNode<T> oneRemoved = result.getFirst();
|
||||
if ( oneRemoved == null )
|
||||
throw new IllegalStateException("Removed our root node, wow, didn't expect that");
|
||||
return oneRemoved;
|
||||
}
|
||||
|
||||
private Pair<RecalDatumNode<T>, Double> getMinPenaltyNode() {
|
||||
final double myValue = isLeaf() ? Double.MAX_VALUE : getPenalty();
|
||||
Pair<RecalDatumNode<T>, Double> maxNode = new Pair<RecalDatumNode<T>, Double>(this, myValue);
|
||||
|
||||
for ( final RecalDatumNode<T> sub : subnodes ) {
|
||||
final Pair<RecalDatumNode<T>, Double> subFind = sub.getMinPenaltyNode();
|
||||
if ( subFind.getSecond() < maxNode.getSecond() ) {
|
||||
maxNode = subFind;
|
||||
}
|
||||
}
|
||||
|
||||
return maxNode;
|
||||
}
|
||||
|
||||
private Pair<RecalDatumNode<T>, Boolean> removeNode(final RecalDatumNode<T> nodeToRemove) {
|
||||
if ( this == nodeToRemove ) {
|
||||
if ( isLeaf() )
|
||||
throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove);
|
||||
// node is the thing we are going to remove, but without any subnodes
|
||||
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty);
|
||||
return new Pair<RecalDatumNode<T>, Boolean>(node, true);
|
||||
} else {
|
||||
// did we remove something in a sub branch?
|
||||
boolean removedSomething = false;
|
||||
|
||||
// our sub nodes with the penalty node removed
|
||||
final Set<RecalDatumNode<T>> sub = new HashSet<RecalDatumNode<T>>(getNumBranches());
|
||||
|
||||
for ( final RecalDatumNode<T> sub1 : subnodes ) {
|
||||
if ( removedSomething ) {
|
||||
// already removed something, just add sub1 back to sub
|
||||
sub.add(sub1);
|
||||
} else {
|
||||
// haven't removed anything yet, so try
|
||||
final Pair<RecalDatumNode<T>, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove);
|
||||
removedSomething = maybeRemoved.getSecond();
|
||||
sub.add(maybeRemoved.getFirst());
|
||||
}
|
||||
}
|
||||
|
||||
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, sub);
|
||||
return new Pair<RecalDatumNode<T>, Boolean>(node, removedSomething);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -23,11 +23,13 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -59,7 +60,7 @@ import java.util.*;
|
|||
* This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
|
||||
*/
|
||||
|
||||
public class RecalDataManager {
|
||||
public class RecalUtils {
|
||||
public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments";
|
||||
public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized";
|
||||
public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0";
|
||||
|
|
@ -85,13 +86,108 @@ public class RecalDataManager {
|
|||
|
||||
private static final String SCRIPT_FILE = "BQSR.R";
|
||||
|
||||
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> eventType = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
/**
|
||||
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
||||
*
|
||||
* Performs the following tasks in order:
|
||||
* 1. Adds all requierd covariates in order
|
||||
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
|
||||
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
|
||||
*
|
||||
* @param argumentCollection the argument collection object for the recalibration walker
|
||||
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
|
||||
*/
|
||||
public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
|
||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
|
||||
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
||||
for (String requestedCovariateString : argumentCollection.COVARIATES) {
|
||||
boolean foundClass = false;
|
||||
for (Class<? extends Covariate> covClass : covariateClasses) {
|
||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||
foundClass = true;
|
||||
if (!requiredClasses.contains(covClass) &&
|
||||
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||
try {
|
||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||
optionalCovariates.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundClass) {
|
||||
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the required covariates to a covariate list
|
||||
*
|
||||
* Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
|
||||
*
|
||||
* @param classes list of classes to add to the covariate list
|
||||
* @return the covariate list
|
||||
*/
|
||||
private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
|
||||
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
|
||||
if (classes.size() != 2)
|
||||
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
|
||||
|
||||
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
||||
dest.add(new QualityScoreCovariate());
|
||||
return dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the standard covariates to a covariate list
|
||||
*
|
||||
* @param classes list of classes to add to the covariate list
|
||||
* @return the covariate list
|
||||
*/
|
||||
private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
|
||||
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
|
||||
for (Class<?> covClass : classes) {
|
||||
try {
|
||||
final Covariate covariate = (Covariate) covClass.newInstance();
|
||||
dest.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
}
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static void listAvailableCovariates(Logger logger) {
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
|
||||
// Print and exit if that's what was requested
|
||||
logger.info("Available covariates:");
|
||||
for (Class<?> covClass : covariateClasses)
|
||||
logger.info(covClass.getSimpleName());
|
||||
logger.info("");
|
||||
}
|
||||
|
||||
|
||||
public enum SOLID_RECAL_MODE {
|
||||
|
|
@ -152,64 +248,6 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
||||
*
|
||||
* Performs the following tasks in order:
|
||||
* 1. Adds all requierd covariates in order
|
||||
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
|
||||
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
|
||||
*
|
||||
* @param argumentCollection the argument collection object for the recalibration walker
|
||||
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
|
||||
*/
|
||||
public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
|
||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
|
||||
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
|
||||
for (String requestedCovariateString : argumentCollection.COVARIATES) {
|
||||
boolean foundClass = false;
|
||||
for (Class<? extends Covariate> covClass : covariateClasses) {
|
||||
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
|
||||
foundClass = true;
|
||||
if (!requiredClasses.contains(covClass) &&
|
||||
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
|
||||
try {
|
||||
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
|
||||
optionalCovariates.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundClass) {
|
||||
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
|
||||
}
|
||||
|
||||
public static void listAvailableCovariates(Logger logger) {
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||
|
||||
// Print and exit if that's what was requested
|
||||
logger.info("Available covariates:");
|
||||
for (Class<?> covClass : covariateClasses)
|
||||
logger.info(covClass.getSimpleName());
|
||||
logger.info("");
|
||||
}
|
||||
|
||||
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int reportTableIndex = 0;
|
||||
|
|
@ -272,8 +310,8 @@ public class RecalDataManager {
|
|||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
|
||||
|
||||
rowIndex++;
|
||||
}
|
||||
|
|
@ -320,7 +358,7 @@ public class RecalDataManager {
|
|||
files.getFirst().close();
|
||||
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
||||
executor.addArgs(csvFileName.getAbsolutePath());
|
||||
executor.addArgs(plotFileName.getAbsolutePath());
|
||||
executor.exec();
|
||||
|
|
@ -480,14 +518,14 @@ public class RecalDataManager {
|
|||
*/
|
||||
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String)
|
||||
colorSpace = ((String) attr).getBytes();
|
||||
else
|
||||
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
|
||||
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
|
|
@ -501,7 +539,7 @@ public class RecalDataManager {
|
|||
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
||||
prevBase = readBases[i];
|
||||
}
|
||||
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||
}
|
||||
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
|
|
@ -545,7 +583,7 @@ public class RecalDataManager {
|
|||
* @return Returns true if the base was inconsistent with the color space
|
||||
*/
|
||||
public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
|
||||
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG);
|
||||
if (attr != null) {
|
||||
final byte[] inconsistency = (byte[]) attr;
|
||||
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
||||
|
|
@ -691,40 +729,4 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the required covariates to a covariate list
|
||||
*
|
||||
* Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
|
||||
*
|
||||
* @param classes list of classes to add to the covariate list
|
||||
* @return the covariate list
|
||||
*/
|
||||
private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
|
||||
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
|
||||
if (classes.size() != 2)
|
||||
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
|
||||
|
||||
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
|
||||
dest.add(new QualityScoreCovariate());
|
||||
return dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the standard covariates to a covariate list
|
||||
*
|
||||
* @param classes list of classes to add to the covariate list
|
||||
* @return the covariate list
|
||||
*/
|
||||
private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
|
||||
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
|
||||
for (Class<?> covClass : classes) {
|
||||
try {
|
||||
final Covariate covariate = (Covariate) covClass.newInstance();
|
||||
dest.add(covariate);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(covClass, e);
|
||||
}
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -33,13 +34,13 @@ public class RecalibrationReport {
|
|||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
final GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
||||
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
RAC = initializeArgumentCollectionTable(argumentTable);
|
||||
|
||||
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
|
|
@ -57,13 +58,13 @@ public class RecalibrationReport {
|
|||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
|
||||
|
||||
parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
|
||||
parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
|
||||
|
||||
parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
|
||||
parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
|
||||
|
||||
parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||
parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +86,7 @@ public class RecalibrationReport {
|
|||
private int countReadGroups(final GATKReportTable reportTable) {
|
||||
Set<String> readGroups = new HashSet<String>();
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ )
|
||||
readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
|
||||
readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
|
||||
return readGroups.size();
|
||||
}
|
||||
|
||||
|
|
@ -139,17 +140,17 @@ public class RecalibrationReport {
|
|||
\ */
|
||||
private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
|
||||
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[3] = event.index;
|
||||
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
|
|
@ -164,11 +165,11 @@ public class RecalibrationReport {
|
|||
*/
|
||||
private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> qualTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
|
||||
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
|
|
@ -183,9 +184,9 @@ public class RecalibrationReport {
|
|||
*/
|
||||
private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
|
||||
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
|
|
@ -193,13 +194,13 @@ public class RecalibrationReport {
|
|||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
|
||||
datum.setEstimatedQReported(estimatedQReported);
|
||||
|
|
@ -218,8 +219,8 @@ public class RecalibrationReport {
|
|||
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
final long quantizedCount = Long.parseLong(countObject.toString());
|
||||
quals[originalQual] = quantizedQual;
|
||||
|
|
@ -239,7 +240,7 @@ public class RecalibrationReport {
|
|||
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final String argument = table.get(i, "Argument").toString();
|
||||
Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
if (value.equals("null"))
|
||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||
|
||||
|
|
@ -250,10 +251,10 @@ public class RecalibrationReport {
|
|||
RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (argument.equals("solid_recal_mode"))
|
||||
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
||||
RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
||||
|
||||
else if (argument.equals("solid_nocall_strategy"))
|
||||
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
|
||||
RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
|
||||
|
||||
else if (argument.equals("mismatches_context_size"))
|
||||
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
|
@ -307,7 +308,7 @@ public class RecalibrationReport {
|
|||
}
|
||||
|
||||
public void output(PrintStream output) {
|
||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
}
|
||||
|
||||
public RecalibrationArgumentCollection getRAC() {
|
||||
|
|
@ -25,9 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -23,8 +23,10 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
|
|
@ -89,8 +91,3 @@ public interface Covariate {
|
|||
public int maximumKeyValue();
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
||||
interface StandardCovariate extends Covariate {}
|
||||
|
||||
interface ExperimentalCovariate extends Covariate {}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface ExperimentalCovariate extends Covariate {}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface RequiredCovariate extends Covariate {}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface StandardCovariate extends Covariate {}
|
||||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -476,7 +476,6 @@ public class AlignmentUtils {
|
|||
}
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
if (!isDeletion) {
|
||||
alignmentPos += elementLength;
|
||||
} else {
|
||||
|
|
@ -498,6 +497,7 @@ public class AlignmentUtils {
|
|||
break;
|
||||
case H:
|
||||
case P:
|
||||
case N:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
|
|
@ -516,16 +516,13 @@ public class AlignmentUtils {
|
|||
final int elementLength = ce.getLength();
|
||||
|
||||
switch (ce.getOperator()) {
|
||||
case I:
|
||||
case S:
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
alignmentLength += elementLength;
|
||||
break;
|
||||
case M:
|
||||
alignmentLength += elementLength;
|
||||
break;
|
||||
case I:
|
||||
case S:
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
|
||||
*
|
||||
* @param sampleNames the sample names
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
|
||||
* @return new VariantContext subsetting to just the given samples
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
|
|
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
|
||||
return subContextFromSamples(sampleNames, true);
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(Collections.singleton(sampleName), true);
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -83,6 +84,9 @@ import java.util.*;
|
|||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
public static final int MAJOR_VERSION = 2;
|
||||
public static final int MINOR_VERSION = 1;
|
||||
|
||||
/**
|
||||
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
||||
* is found in the input VC. This can be very dangerous as the genotype encoding
|
||||
|
|
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
writer.close();
|
||||
|
||||
final byte[] headerBytes = capture.toByteArray();
|
||||
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
|
||||
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
|
||||
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
|
||||
outputStream.write(headerBytes);
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -282,12 +282,12 @@ public abstract class BaseTest {
|
|||
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
|
||||
Assert.assertTrue(actual instanceof Double);
|
||||
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
|
||||
}
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
|
||||
Assert.assertTrue(actual instanceof Double);
|
||||
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
|
||||
}
|
||||
|
||||
|
|
@ -303,13 +303,13 @@ public abstract class BaseTest {
|
|||
|
||||
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
||||
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isNaN(actual));
|
||||
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
|
||||
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isInfinite(actual));
|
||||
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
|
||||
else {
|
||||
final double delta = Math.abs(actual - expected);
|
||||
final double ratio = Math.abs(actual / expected - 1.0);
|
||||
Assert.assertTrue(delta < tolerance || ratio < tolerance);
|
||||
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,9 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import static org.testng.Assert.fail;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
|
|
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
|
|
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
|
||||
}
|
||||
}
|
||||
|
||||
/** Test that we clear program records when requested */
|
||||
@Test
|
||||
public void testRemoveProgramRecords() {
|
||||
logger.warn("Executing testRemoveProgramRecords");
|
||||
|
||||
// setup the data
|
||||
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
||||
|
||||
// use defaults
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false);
|
||||
|
||||
List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
||||
|
||||
boolean removeProgramRecords = false;
|
||||
data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
||||
removeProgramRecords = true;
|
||||
data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -33,15 +34,15 @@ public class BQSRGathererUnitTest {
|
|||
for (GATKReportTable originalTable : originalReport.getTables()) {
|
||||
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
|
||||
List<String> columnsToTest = new LinkedList<String>();
|
||||
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
|
||||
columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
|
||||
columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
|
||||
}
|
||||
|
||||
else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) {
|
||||
columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) {
|
||||
columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,13 +38,17 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class ArtificialReadPileupTestProvider {
|
||||
final String refBases = "ACAGAGCTGACCCTCCCTCCCCTCTCCCAGTGCAACAGCACGGGCGGCGACTGCTTTTACCGAGGCTACACGTCAGGCGTGGCGGCTGTCCAGGACTGGTACCACTTCCACTATGTGGATCTCTGCTGAGGACCAGGAAAGCCAGCACCCGCAGAGACTCTTCCCCAGTGCTCCATACGATCACCATTCTCTGCAGAAGGTCAGACGTCACTGGTGGCCCCCCAGCCTCCTCAGCAGGGAAGGATACTGTCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGCCTTGGACCAGCTGGTGCAGTTTGCCTACACGGCTGAGATTGTGGTGGGCGAGGGC";
|
||||
final int contigStart = 1;
|
||||
final int contigStop = 10;
|
||||
final int contigStop = refBases.length();
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
|
||||
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
final String artificialContig = "chr1";
|
||||
|
|
@ -54,16 +58,18 @@ public class ArtificialReadPileupTestProvider {
|
|||
final int artificialMappingQuality = 60;
|
||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||
List<SAMReadGroupRecord> sampleRGs;
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
private String sampleName(int i) { return sampleNames.get(i); }
|
||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||
public final int offset = 5;
|
||||
public final int locStart = 105; // start position where we desire artificial variant
|
||||
private final int readLength = 10; // desired read length in pileup
|
||||
public final int readOffset = 4;
|
||||
private final int readStart = locStart - readOffset;
|
||||
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart);
|
||||
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,locStart-100,locStart+100);
|
||||
public final String windowBases = refBases.substring(locStart-100-1,locStart+100);
|
||||
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,windowBases.getBytes());
|
||||
|
||||
byte BASE_QUAL = 50;
|
||||
|
||||
|
|
@ -96,25 +102,28 @@ public class ArtificialReadPileupTestProvider {
|
|||
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
|
||||
return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL);
|
||||
}
|
||||
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele,
|
||||
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(final int eventLength,
|
||||
final String altBases,
|
||||
final int[] numReadsPerAllele,
|
||||
final boolean addBaseErrors,
|
||||
final int phredScaledBaseErrorRate) {
|
||||
final String refChar = new String(new byte[]{referenceContext.getBase()});
|
||||
|
||||
String refAllele, altAllele;
|
||||
if (eventLength == 0) {
|
||||
// SNP case
|
||||
refAllele = new String(new byte[]{referenceContext.getBase()});
|
||||
altAllele = new String(altBases.substring(0,1));
|
||||
refAllele = refChar;
|
||||
altAllele = altBases.substring(0,1);
|
||||
|
||||
} else if (eventLength>0){
|
||||
// insertion
|
||||
refAllele = "";
|
||||
altAllele = altBases.substring(0,eventLength);
|
||||
refAllele = refChar;
|
||||
altAllele = refChar+altBases/*.substring(0,eventLength)*/;
|
||||
}
|
||||
else {
|
||||
// deletion
|
||||
refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
|
||||
altAllele = "";
|
||||
refAllele = new String(referenceContext.getForwardBases()).substring(0,Math.abs(eventLength)+1);
|
||||
altAllele = refChar;
|
||||
}
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
|
@ -138,22 +147,21 @@ public class ArtificialReadPileupTestProvider {
|
|||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
|
||||
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = refAllele.length();
|
||||
final int refAlleleLength = refAllele.length();
|
||||
|
||||
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
|
||||
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
|
||||
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
|
||||
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
|
||||
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
|
||||
|
||||
int alleleLength = allele.length();
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
|
||||
int readCounter = 0;
|
||||
for ( int d = 0; d < numReadsPerAllele; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
byte[] readBases = trueHaplotype(allele, refAlleleLength, readLength);
|
||||
if (addErrors)
|
||||
addBaseErrors(readBases, phredScaledErrorRate);
|
||||
|
||||
|
|
@ -165,20 +173,20 @@ public class ArtificialReadPileupTestProvider {
|
|||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
boolean isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
boolean isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
|
||||
int eventLength = alleleLength - refAlleleLength;
|
||||
if (isReference)
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
if (isBeforeDeletion || isBeforeInsertion)
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
read.setCigarString((readOffset+1)+"M"+ Math.abs(eventLength) + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-readOffset)+"M");
|
||||
else // SNP case
|
||||
read.setCigarString(readBases.length+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
|
|
@ -187,16 +195,25 @@ public class ArtificialReadPileupTestProvider {
|
|||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength)));
|
||||
}
|
||||
|
||||
return pileupElements;
|
||||
}
|
||||
|
||||
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
|
||||
/**
|
||||
* Create haplotype with desired allele and reference context
|
||||
* @param allele Desired allele string
|
||||
* @param refAlleleLength Length of reference allele.
|
||||
* @param desiredLength Desired haplotype length
|
||||
* @return String with haplotype formed by (prefix)+allele bases + postfix
|
||||
*/
|
||||
private byte[] trueHaplotype(final String allele, final int refAlleleLength, final int desiredLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(0, offset);
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
final int startIdx= locStart - readOffset-1;
|
||||
|
||||
final String prefix = refBases.substring(startIdx, locStart-1);
|
||||
final String postfix = refBases.substring(locStart+refAlleleLength-1,startIdx + desiredLength);
|
||||
|
||||
return (prefix+allele+postfix).getBytes();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
final int nSamples = 1;
|
||||
final int[] numReadsPerAllele = new int[]{10,10};
|
||||
final String SAMPLE_PREFIX = "sample";
|
||||
|
|
@ -65,7 +64,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testBasicConsensusCounts() {
|
||||
// 4 inserted bases, min cnt = 10
|
||||
String altBases = "CCTCCTGAGA";
|
||||
String altBases = "CCTC";
|
||||
int eventLength = 4;
|
||||
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
|
|
@ -73,13 +72,11 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||
|
||||
|
||||
|
||||
//altBases = "CCTCMTGAGA";
|
||||
|
||||
// test deletions
|
||||
eventLength = 3;
|
||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||
Assert.assertEquals(alleles.get(0).getBaseString().substring(1,eventLength), new String(pileupProvider.getReferenceContext().getForwardBases()).substring(1,eventLength));
|
||||
|
||||
// same with min Reads = 11
|
||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||
|
|
@ -92,14 +89,14 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(alleles.size(),0);
|
||||
|
||||
// test N's in insertions
|
||||
altBases = "CCTCNTGAGA";
|
||||
altBases = "CCTC";
|
||||
eventLength = 4;
|
||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||
Assert.assertEquals(alleles.get(1).getBaseString().substring(1,eventLength+1), altBases);
|
||||
|
||||
altBases = "CCTCNTGAGA";
|
||||
altBases = "CCTCN";
|
||||
eventLength = 5;
|
||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
|
|
|
|||
|
|
@ -373,13 +373,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing SnpEff
|
||||
// testing MinIndelFraction
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
|
||||
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
|
||||
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -403,4 +403,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing Ns in CIGAR
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
|
||||
Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
|
||||
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
|
||||
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
|
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
|
||||
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"62f81e7d2082fbc71cae0101c27fefad", // tranches
|
||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
|
||||
"f360ce3eb2b0b887301be917a9843e2b", // tranches
|
||||
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
|
||||
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
|
|||
|
|
@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
|
||||
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String testVCF = privateTestDir + "ex2.vcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
|
||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest {
|
|||
requestedCovariates[2] = coCov;
|
||||
requestedCovariates[3] = cyCov;
|
||||
|
||||
ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
// check that the length is correct
|
||||
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest {
|
|||
|
||||
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
|
||||
int nKeys = 0; // keep track of how many keys were produced
|
||||
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
|
|
@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
VariantContext sub = vc.subContextFromSamples(samples, true);
|
||||
VariantContext sub = vc.subContextFromSamples(samples);
|
||||
sub.getNSamples();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -347,6 +347,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.knownSites ++= qscript.dbSNP
|
||||
this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.disable_indel_quals = true
|
||||
this.out = outRecalFile
|
||||
if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
|
||||
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
|
||||
|
|
@ -359,7 +360,6 @@ class DataProcessingPipeline extends QScript {
|
|||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.BQSR = inRecalFile
|
||||
this.disable_indel_quals = true
|
||||
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
|
||||
this.out = outBam
|
||||
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
|
||||
|
|
|
|||
|
|
@ -166,6 +166,7 @@ class PacbioProcessingPipeline extends QScript {
|
|||
this.knownSites :+= dbSNP
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.disable_indel_quals = true
|
||||
this.out = outRecalFile
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||
|
|
@ -178,7 +179,6 @@ class PacbioProcessingPipeline extends QScript {
|
|||
this.input_file :+= inBam
|
||||
this.BQSR = inRecalFile
|
||||
this.out = outBam
|
||||
this.disable_indel_quals = true
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class DataProcessingPipelineTest {
|
|||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
|
||||
" -test ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "0f0b32e94640a8d548b4c1134ad4c075"
|
||||
spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
|
|||
" -bwa /home/unix/carneiro/bin/bwa",
|
||||
" -bwape ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "6b4f13d22b45d7d617ee959fdc278ed2"
|
||||
spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue