Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-08-03 13:14:43 -04:00
commit c3b6e2b143
101 changed files with 1603 additions and 787 deletions

View File

@ -52,7 +52,7 @@
<dependency org="gov.nist" name="Jama" rev="1.0.2"/>
<!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
<!-- Dependencies for the html walker documention -->
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {

View File

@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;

View File

@ -68,10 +68,10 @@ public class ErrorModel {
break;
}
}
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
if (refSampleVC.isIndel()) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
}
@ -96,7 +96,8 @@ public class ErrorModel {
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
if (!haplotypeMap.isEmpty())
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
}
int idx = 0;
for (PileupElement refPileupElement : refSamplePileup) {
@ -108,7 +109,7 @@ public class ErrorModel {
if (DEBUG) System.out.println(m);
isMatch |= m;
}
if (refSampleVC.isIndel()) {
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
if (!isInformativeElement(perAlleleLikelihoods))
@ -173,10 +174,10 @@ public class ErrorModel {
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
if (allele.isReference()) {
// for a ref allele, any base mismatch or new indel is a mismatch.
if(allele.getBases().length>0 )
if(allele.getBases().length>0)
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
// could clean up/simplify this when unpadding is removed
return (pileupElement.getBase() == refBase);
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
else
// either null allele to compare, or ref/alt lengths are different (indel by definition).
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC;
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false;
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy;
this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
return newPool;
}
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
// todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
/**
*
* @param set ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
continue;
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
}
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next();
}
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone();
while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
// for current conformation, get all possible ways to break vector K into two components G1 and G2
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
while (innerIterator.hasNext()) {
// check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( numOriginalAltAlleles == numNewAltAlleles) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// find the genotype with maximum likelihoods
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import java.util.*;
public abstract class PoolGenotypeLikelihoods {
public abstract class GeneralPloidyGenotypeLikelihoods {
protected final int numChromosomes;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
private static final boolean FAST_GL_COMPUTATION = true;
// constructor with given logPL elements
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
this.alleles = alleles;
this.nAlleles = alleles.size();
numChromosomes = ploidy;
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
} else {
if (logLikelihoods.length != likelihoodDim)
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
}
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
final int numAlleles = currentState.length;
final int ploidy = restrictSumTo;
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
}
else
throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
double[] newPLs = new double[newPLSize];
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
newCount[idx] = pVec[permutationKey[idx]];
// get corresponding index from new count
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
newPLs[outputIdx] = pl;
if (VERBOSE) {
System.out.println("Old Key:"+Arrays.toString(pVec));

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
//protected Set<String> laneIDs;
public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
final protected UnifiedArgumentCollection UAC;
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC,logger);
this.UAC = UAC;
@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
protected static class PoolGenotypeData {
public final String name;
public final PoolGenotypeLikelihoods GL;
public final GeneralPloidyGenotypeLikelihoods GL;
public final int depth;
public final List<Allele> alleles;
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
this.name = name;
this.GL = GL;
this.depth = depth;
@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
// create the GenotypeLikelihoods object
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
// actually compute likelihoods
final int nGoodBases = GL.add(pileup, UAC);
if ( nGoodBases > 0 )
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
allAlleles, alleles);
// normalize in log space so that max element is zero.
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
Abstract methods - must be implemented in derived classes
*/
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,

View File

@ -18,7 +18,7 @@ import java.util.*;
* Time: 10:06 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
final PairHMMIndelErrorModel pairModel;
final LinkedHashMap<Allele, Haplotype> haplotypeMap;
final ReferenceContext refContext;
@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
final byte refBase;
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.pairModel = pairModel;
this.haplotypeMap = haplotypeMap;

View File

@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
*/
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
super(UAC, logger);
@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation){
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
@ -96,6 +94,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
haplotypeMap.clear();
}
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
// sanity check: if haplotype map couldn't be created, clear allele list
if (haplotypeMap.isEmpty())
alleles.clear();
return alleles;
}

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
* and posteriors given a pile of bases and quality scores
*
*/
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
final List<Allele> myAlleles;
final int[] alleleIndices;
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
* @param useBQAedPileup Use BAQed pileup
* @param ignoreLaneInformation If true, lane info is ignored
*/
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.useBAQedPileup = useBQAedPileup;
myAlleles = new ArrayList<Allele>(alleles);
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
Allele refAllele = alleles.get(0);
//sanity check: by construction, first allele should ALWAYS be the reference alleles
if (!refAllele.isReference())
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
if (myAlleles.size() < BaseUtils.BASES.length) {
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,

View File

@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation) {
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,

View File

@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling

View File

@ -19,7 +19,7 @@ import java.util.Arrays;
* Time: 7:44 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolAFCalculationModelUnitTest extends BaseTest {
public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));

View File

@ -41,7 +41,7 @@ import java.io.PrintStream;
import java.util.*;
public class PoolGenotypeLikelihoodsUnitTest {
public class GeneralPloidyGenotypeLikelihoodsUnitTest {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
@ -60,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
public void testStoringLikelihoodElements() {
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
// basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
int ploidy = 20;
int numAlleles = 4;
@ -78,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int k=0; k < gls.length; k++)
gls[k]= (double)k;
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
double[] glnew = gl.getLikelihoods();
Assert.assertEquals(gls, glnew);
@ -90,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int ploidy = 2; ploidy < 10; ploidy++) {
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
}
@ -102,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create iterator, compare linear index given by iterator with closed form function
int numAlleles = 4;
int ploidy = 2;
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
while(iterator.hasNext()) {
System.out.format("\n%d:",iterator.getLinearIndex());
@ -111,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
System.out.format("%d ",aa);
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
System.out.format("Computed idx = %d\n",computedIdx);
iterator.next();
}
@ -140,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
allelesToSubset.add(Allele.create("A",false));
allelesToSubset.add(Allele.create("C",false));
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
originalAlleles, allelesToSubset);
@ -170,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testIndexIterator() {
int[] seed = new int[]{1,2,3,4};
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
@ -228,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
while(iterator.hasNext()) {
int[] a = iterator.getCurrentVector();
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
if (VERBOSE) {
System.out.format("%d:",iterator.getLinearIndex());
for (int i=0; i < seed.length; i++)
@ -289,13 +289,11 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
// TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
@Test (enabled = false)
@Test
public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = (char)refByte + "TCA";
final String altBases = "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(refByte, true));
@ -316,7 +314,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise
// CASE 1: Test HET insertion
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
@ -393,6 +391,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
final Set<String> laneIDs = new TreeSet<String>();
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
@ -409,23 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel);
final int refIdx = 0;
int altIdx = 2;
// ref allele must be first
allAlleles.add(Allele.create(refByte, true));
// all first ref allele
allAlleles.add(Allele.create(refByte,true));
for (byte b: BaseUtils.BASES) {
if (refByte != b) {
if (b == altByte)
altIdx = allAlleles.size();
if (refByte != b)
allAlleles.add(Allele.create(b, false));
}
}
final int refIdx = 0;
int altIdx = -1;
for (int k=0; k < allAlleles.size(); k++)
if (altByte == allAlleles.get(k).getBases()[0]) {
altIdx = k;
break;
}
PrintStream out = null;
if (SIMULATE_NOISY_PILEUP) {
try {
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
out = new PrintStream(new File("GLUnitTest.table"));
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
}
catch (Exception e) {}
@ -449,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
if (VERBOSE) {
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -478,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
mlPair = noisyGL.getMostLikelyACCount();

View File

@ -12,33 +12,33 @@ import org.testng.annotations.Test;
* Time: 11:28 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolCallerIntegrationTest extends WalkerTest {
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
final String REFSAMPLE_NAME = "NA12878";
final String MTINTERVALS = "MT";
final String MTINTERVALS = "MT:1-3000";
final String LSVINTERVALS = "20:40,000,000-41,000,000";
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
private void PC_MT_Test(String bam, String args, String name, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest {
@Test
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
}
@Test
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
}
@Test
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
}
@Test
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
}
@Test
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
}
@Test
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
}
}

View File

@ -1,4 +1,5 @@
library("ggplot2")
library("tools") #For compactPDF in R 2.13+
args <- commandArgs(TRUE)
data <- read.csv(args[1])

View File

@ -130,12 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
}

View File

@ -274,6 +274,38 @@ public class GenomeAnalysisEngine {
//return result;
}
// TODO -- Let's move this to a utility class in unstable - but which one?
// **************************************************************************************
// * Handle Deprecated Walkers *
// **************************************************************************************
// Mapping from walker name to major version number where the walker first disappeared
private static Map<String, String> deprecatedGATKWalkers = new HashMap<String, String>();
static {
deprecatedGATKWalkers.put("CountCovariates", "2.0");
deprecatedGATKWalkers.put("TableRecalibration", "2.0");
}
/**
* Utility method to check whether a given walker has been deprecated in a previous GATK release
*
* @param walkerName the walker class name (not the full package) to check
*/
public static boolean isDeprecatedWalker(final String walkerName) {
return deprecatedGATKWalkers.containsKey(walkerName);
}
/**
* Utility method to check whether a given walker has been deprecated in a previous GATK release
*
* @param walkerName the walker class name (not the full package) to check
*/
public static String getDeprecatedMajorVersionNumber(final String walkerName) {
return deprecatedGATKWalkers.get(walkerName);
}
// **************************************************************************************
/**
* Retrieves an instance of the walker based on the walker name.
*
@ -287,6 +319,9 @@ public class GenomeAnalysisEngine {
if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) {
e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK");
}
else if ( isDeprecatedWalker(walkerName) ) {
e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName));
}
throw e;
}
}
@ -762,6 +797,14 @@ public class GenomeAnalysisEngine {
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
if (argCollection.keepProgramRecords)
removeProgramRecords = false;
return new SAMDataSource(
samReaderIDs,
threadAllocation,
@ -778,7 +821,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(),
refReader,
getBaseRecalibration(),
argCollection.defaultBaseQualities);
argCollection.defaultBaseQualities,
removeProgramRecords);
}
/**

View File

@ -249,6 +249,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
public boolean removeProgramRecords = false;
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
public boolean keepProgramRecords = false;
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe;

View File

@ -89,6 +89,11 @@ public class SAMDataSource {
*/
private final SAMFileReader.ValidationStringency validationStringency;
/**
* Do we want to remove the program records from this data source?
*/
private final boolean removeProgramRecords;
/**
* Store BAM indices for each reader present.
*/
@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1);
(byte) -1,
false);
}
/**
@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) {
byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser;
@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null;
validationStringency = strictness;
this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize);
else {
@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers);
availableResources.add(readers);
}
@ -777,9 +785,11 @@ public class SAMDataSource {
/**
* Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load.
* TODO: validationStringency is not used here
* @param validationStringency validation stringency.
* @param removeProgramRecords indicate whether to clear program records from the readers
*/
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}

View File

@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
if (stub.getGenerateMD5())
factory.setCreateMd5File(true);
// Adjust max records in RAM.
// TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
if(stub.getMaxRecordsInRam() != null)
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

View File

@ -86,7 +86,7 @@ public class GATKRunReport {
private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
// number of milliseconds before the S3 put operation is timed-out:
private static final long S3PutTimeOut = 30 * 1000;
private static final long S3PutTimeOut = 10 * 1000;
/**

View File

@ -89,9 +89,9 @@ public class GATKReport {
reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
} catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file);
throw new UserException.CouldNotReadInputFile(file, e);
}

View File

@ -32,6 +32,7 @@ import java.util.List;
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)

View File

@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 8/2/12
* Time: 1:58 PM
* To change this template use File | Settings | File Templates.
*/
import java.lang.annotation.*;
/**
* Indicates that program records should be removed from SAM headers by default for this walker
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface RemoveProgramRecords {
}

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import java.io.File;
import java.io.FileNotFoundException;
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
generalReport.output(outputFile);

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.collections.Pair;
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -58,7 +62,7 @@ import java.util.ArrayList;
* This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
* only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
* of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
* reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
* reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
* probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
* The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
* <p>
@ -90,7 +94,7 @@ import java.util.ArrayList;
* <h2>Examples</h2>
* <pre>
* java -Xmx4g -jar GenomeAnalysisTK.jar \
* -T BaseQualityScoreRecalibrator \
* -T BaseRecalibrator \
* -I my_reads.bam \
* -R resources/Homo_sapiens_assembly18.fasta \
* -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables;
@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) {
RecalDataManager.listAvailableCovariates(logger);
RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
if (readNotSeen(read)) {
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
RecalDataManager.parsePlatformForRead(read, RAC);
if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
RecalUtils.parsePlatformForRead(read, RAC);
if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
continue;
}
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
}
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
RecalDataManager.isColorSpaceConsistent(read, offset))
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
}
countedSites++;
@ -271,13 +275,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
public void onTraversalDone(Long result) {
logger.info("Calculating quantized quality scores...");
quantizeQualityScores();
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) {
logger.info("Generating recalibration plots...");
generatePlots();
}
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
logger.info("Processed: " + result + " sites");
}
@ -285,10 +292,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
else
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
@ -309,7 +316,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
}
}

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 6, 2010
*
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
public Datum(long numObservations, long numMismatches) {
this.numObservations = numObservations;
this.numMismatches = numMismatches;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
synchronized void increment(final boolean isError) {
numObservations++;
numMismatches += isError ? 1:0;
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Datum))
return false;
Datum other = (Datum) o;
return numMismatches == other.numMismatches && numObservations == other.numObservations;
}
}

View File

@ -1,148 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum extends Datum {
private static final double UNINITIALIZED = -1.0;
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
numObservations = _numObservations;
numMismatches = _numMismatches;
estimatedQReported = reportedQuality;
empiricalQuality = UNINITIALIZED;
}
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.numObservations;
this.numMismatches = copy.numMismatches;
this.estimatedQReported = copy.estimatedQReported;
this.empiricalQuality = copy.empiricalQuality;
}
public void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.numObservations, other.numMismatches);
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
empiricalQuality = UNINITIALIZED;
}
@Override
public void increment(final boolean isError) {
super.increment(isError);
empiricalQuality = UNINITIALIZED;
}
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
protected final void calcEmpiricalQuality() {
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public void setEstimatedQReported(final double estimatedQReported) {
this.estimatedQReported = estimatedQReported;
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public void setEmpiricalQuality(final double empiricalQuality) {
this.empiricalQuality = empiricalQuality;
}
public final double getEmpiricalQuality() {
if (empiricalQuality == UNINITIALIZED)
calcEmpiricalQuality();
return empiricalQuality;
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}
public String stringForCSV() {
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
}
/**
* We don't compare the estimated quality reported because it may be different when read from
* report tables.
*
* @param o the other recal datum
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof RecalDatum))
return false;
RecalDatum other = (RecalDatum) o;
return super.equals(o) &&
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
}
}

View File

@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.util.Collections;
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
* reads which have had the reference inserted because of color space inconsistencies.
*/
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
/**
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
* their color space tag can not be recalibrated.
*/
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
/**
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection {
public GATKReportTable generateReportTable() {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
argumentsTable.addColumn("Argument");
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addRowID("covariate", true);
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
argumentsTable.addRowID("no_standard_covs", true);
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.addRowID("run_without_dbsnp", true);
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.addRowID("solid_recal_mode", true);
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.addRowID("solid_nocall_strategy", true);
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true);
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true);
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("low_quality_tail", true);
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true);
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.addRowID("force_platform", true);
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -47,7 +47,10 @@ import java.util.List;
* <p>
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
* Note that if there are multiple variants at a site, it takes the first one seen.
* Several important notes:
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
@ -102,7 +105,7 @@ public class FastaAlternateReference extends FastaReference {
String refBase = String.valueOf((char)ref.getBase());
// Check to see if we have a called snp
for ( VariantContext vc : tracker.getValues(variants) ) {
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
if ( vc.isFiltered() )
continue;

View File

@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
EXACT,
POOL
EXACT
}
protected int N;

View File

@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
BOTH,
POOLSNP,
POOLINDEL,
POOLBOTH
GeneralPloidySNP,
GeneralPloidyINDEL,
BOTH
}
public enum GENOTYPING_MODE {

View File

@ -82,7 +82,7 @@ import java.util.*;
* -o snps.raw.vcf \
* -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \
* -dcov [50] \
* -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
* [-L targets.interval_list]
* </pre>
*
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
} else {
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
}
*/
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();

View File

@ -50,6 +50,7 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GeneralPloidy";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine {
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
modelPrefix = GPSTRING + modelPrefix;
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine {
if ( vcInput.isSNP() ) {
// ignore SNPs if the user chose INDEL mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(UAC.GLmodel);
}
}
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
models.add(UAC.GLmodel);
}
// No support for other types yet
}
@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine {
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
models.add(UAC.GLmodel);
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
}
@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine {
}
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
// user-specified name
String afModelName = UAC.AFmodel.name();
if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
afModelName = GPSTRING + afModelName;
for (int i = 0; i < afClasses.size(); i++) {
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
if (afModelName.equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);

View File

@ -73,19 +73,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
@Output(required=false, doc="Output bam")
protected StingSAMFileWriter writer = null;
/**
* If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
* may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available
* space (by setting java.io.tempdir on the command-line).
*/
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
protected int MAX_RECORDS_IN_RAM = 500000;
public void initialize() {
// set up the output writer
if ( writer != null )
writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
}
public void initialize() {}
private void emit(final SAMRecord read) {
if ( writer != null )

View File

@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);

View File

@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
return true;
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
// first subset to the samples
VariantContext subContext = vc.subContextFromSamples(samples, true);
VariantContext subContext = vc.subContextFromSamples(samples);
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?

View File

@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
if ( samples == null || samples.isEmpty() )
return true;
VariantContext subContext = vc.subContextFromSamples(samples, false);
VariantContext subContext = vc.subContextFromSamples(samples);
if ( subContext.isPolymorphicInSamples() ) {
return true;
}

View File

@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
// if either of these are NO_VARIATION they are LENIENT matches
return EvalCompMatchType.LENIENT;
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;

View File

@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
}
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc1 != null) updateTiTv(vc1, false);
if (vc2 != null) updateTiTv(vc2, true);
@Override
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null)
updateTiTv(eval, false);
if (comp != null)
updateTiTv(comp, true);
}
@Override

View File

@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) {
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
return KNOWN_STATES;
}
}

View File

@ -197,7 +197,9 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
}
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@ -262,12 +264,8 @@ public class VariantEvalUtils {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
if (subsetBySample && vc.hasGenotypes()) {
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
else
vcsub = ensureAnnotations(vc, vc);
}
if (subsetBySample && vc.hasGenotypes())
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
if ((byFilter || !vcsub.isFiltered())) {
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);

View File

@ -58,14 +58,11 @@ import java.util.*;
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
* slightly lower quality level.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
* <p>
* The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
* The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
* <p>
* The tranches file that was generated by the VariantRecalibrator walker.
*
@ -82,6 +79,7 @@ import java.util.*;
* --ts_filter_level 99.0 \
* -tranchesFile path/to/output.tranches \
* -recalFile path/to/output.recal \
* -mode SNP \
* -o path/to/output.recalibrated.filtered.vcf
* </pre>
*
@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
for( final VariantContext vc : VCs ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
if( recalDatum == null ) {

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -273,11 +274,37 @@ public class VariantDataManager {
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
switch( trainVC.getType() ) {
case SNP:
case MNP:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
case INDEL:
case MIXED:
case SYMBOLIC:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;
}
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
switch( mode ) {
case SNP:
return evalVC.isSNP() || evalVC.isMNP();
case INDEL:
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
case BOTH:
return true;
default:
throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
}
}
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
// we need to sort in coordinate order in order to produce a valid VCF
Collections.sort( data, new Comparator<VariantDatum>() {

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.FileNotFoundException;
@ -65,7 +63,7 @@ import java.util.*;
* The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
* One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
* The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
* between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
* between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
* variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
* as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
* error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
@ -73,15 +71,9 @@ import java.util.*;
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
*
* <p>
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
*
* <p>
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
@ -90,7 +82,7 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
* A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
* <p>
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
*
@ -102,8 +94,9 @@ import java.util.*;
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
* -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
* -mode SNP \
* -recalFile path/to/output.recal \
* -tranchesFile path/to/output.tranches \
* -rscriptFile path/to/output.plots.R
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Advanced
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
//@Hidden
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
/////////////////////////////
// Private Member Variables
@ -255,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
final VariantDatum datum = new VariantDatum();
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
double priorFactor = QualityUtils.qualToProb( datum.prior );
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
//}
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
mapList.add( datum );
@ -282,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
return mapList;
}
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce

View File

@ -69,9 +69,14 @@ public class QualityUtils {
* @return a probability (0.0 - 1.0)
*/
static private double qualToErrorProbRaw(int qual) {
return qualToErrorProb((double) qual);
}
public static double qualToErrorProb(final double qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
}
static public double qualToErrorProb(byte qual) {
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}

View File

@ -168,6 +168,28 @@ public class PluginManager<PluginType> {
String pluginName = getName(pluginClass);
pluginsByName.put(pluginName, pluginClass);
}
// sort the plugins so the order of elements is deterministic
sortPlugins(plugins);
sortPlugins(interfaces);
}
/**
* Sorts, in place, the list of plugins according to getName() on each element
*
* @param unsortedPlugins
*/
private final void sortPlugins(final List<Class<? extends PluginType>> unsortedPlugins) {
Collections.sort(unsortedPlugins, new ComparePluginsByName());
}
private final class ComparePluginsByName implements Comparator<Class<? extends PluginType>> {
@Override
public int compare(final Class<? extends PluginType> aClass, final Class<? extends PluginType> aClass1) {
String pluginName1 = getName(aClass);
String pluginName2 = getName(aClass1);
return pluginName1.compareTo(pluginName2);
}
}
/**

View File

@ -4,7 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false;
private final static int ALLOWED_MAJOR_VERSION = 2;
private final static int MIN_MINOR_VERSION = 1;
private BCFVersion bcfVersion = null;
private VCFHeader header = null;
/**
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
try {
// note that this reads the magic as well, and so does double duty
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
error("Input stream does not begin with BCF2 magic");
bcfVersion = BCFVersion.readBCFVersion(inputStream);
if ( bcfVersion == null )
error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
logger.info("BCF version " + bcfVersion);
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
FileInputStream fis = null;
try {
fis = new FileInputStream(path);
return BCF2Utils.startsWithBCF2Magic(fis);
final BCFVersion version = BCFVersion.readBCFVersion(fis);
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
} catch ( FileNotFoundException e ) {
return false;
} catch ( IOException e ) {
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
try {
if ( fis != null ) fis.close();
} catch ( IOException e ) {
; // do nothing
// do nothing
}
}
}

View File

@ -202,7 +202,7 @@ public final class BCF2Decoder {
return null;
else {
final String s = new String(bytes, 0, goodLength);
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
}
} catch ( IOException e ) {
throw new ReviewedStingException("readByte failure", e);

View File

@ -41,8 +41,6 @@ import java.util.*;
* @since 5/12
*/
public final class BCF2Utils {
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
public static final int OVERFLOW_ELEMENT_MARKER = 15;
@ -75,68 +73,60 @@ public final class BCF2Utils {
*/
@Requires("header != null")
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
public static ArrayList<String> makeDictionary(final VCFHeader header) {
final Set<String> seen = new HashSet<String>();
final ArrayList<String> dict = new ArrayList<String>();
boolean sawPASS = false;
// special case the special PASS field which doesn't show up in the FILTER field definitions
seen.add(VCFConstants.PASSES_FILTERS_v4);
dict.add(VCFConstants.PASSES_FILTERS_v4);
// set up the strings dictionary
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
if ( ! sawPASS )
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
return dict;
}
@Requires({"nElements >= 0", "type != null"})
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
return typeByte;
}
@Ensures("result >= 0")
public final static int decodeSize(final byte typeDescriptor) {
public static int decodeSize(final byte typeDescriptor) {
return (0xF0 & typeDescriptor) >> 4;
}
@Ensures("result >= 0")
public final static int decodeTypeID(final byte typeDescriptor) {
public static int decodeTypeID(final byte typeDescriptor) {
return typeDescriptor & 0x0F;
}
@Ensures("result != null")
public final static BCF2Type decodeType(final byte typeDescriptor) {
public static BCF2Type decodeType(final byte typeDescriptor) {
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
}
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
public static boolean sizeIsOverflow(final byte typeDescriptor) {
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
}
@Requires("nElements >= 0")
public final static boolean willOverflow(final long nElements) {
public static boolean willOverflow(final long nElements) {
return nElements > MAX_INLINE_ELEMENTS;
}
public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
stream.read(magicBytes);
return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
}
public final static byte readByte(final InputStream stream) {
public static byte readByte(final InputStream stream) {
// TODO -- shouldn't be capturing error here
try {
return (byte)(stream.read() & 0xFF);
@ -155,7 +145,7 @@ public final class BCF2Utils {
*/
@Requires({"strings != null", "strings.size() > 1"})
@Ensures("result != null")
public static final String collapseStringList(final List<String> strings) {
public static String collapseStringList(final List<String> strings) {
final StringBuilder b = new StringBuilder();
for ( final String s : strings ) {
if ( s != null ) {
@ -177,14 +167,14 @@ public final class BCF2Utils {
*/
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
@Ensures("result != null")
public static final List<String> exploreStringList(final String collapsed) {
public static List<String> explodeStringList(final String collapsed) {
assert isCollapsedString(collapsed);
final String[] exploded = collapsed.substring(1).split(",");
return Arrays.asList(exploded);
}
@Requires("s != null")
public static final boolean isCollapsedString(final String s) {
public static boolean isCollapsedString(final String s) {
return s.charAt(0) == ',';
}
@ -226,7 +216,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final int value) {
public static BCF2Type determineIntegerType(final int value) {
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
if ( potentialType.withinRange(value) )
return potentialType;
@ -236,7 +226,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final int[] values) {
public static BCF2Type determineIntegerType(final int[] values) {
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
@ -262,7 +252,7 @@ public final class BCF2Utils {
*/
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
@Ensures("result.isIntegerType()")
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
switch ( t1 ) {
case INT8: return t2;
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
@ -272,7 +262,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final List<Integer> values) {
public static BCF2Type determineIntegerType(final List<Integer> values) {
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
final BCF2Type type1 = determineIntegerType(value);
@ -297,7 +287,7 @@ public final class BCF2Utils {
* @param o
* @return
*/
public final static List<Object> toList(final Object o) {
public static List<Object> toList(final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<Object>)o;
else return Collections.singletonList(o);
@ -305,7 +295,7 @@ public final class BCF2Utils {
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
public static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
@ -323,7 +313,7 @@ public final class BCF2Utils {
}
}
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
switch ( type.getSizeInBytes() ) {
case 1:
encodeStream.write(0xFF & value);

View File

@ -0,0 +1,80 @@
package org.broadinstitute.sting.utils.codecs.bcf2;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
/**
* Simple holder for BCF version information
*
* User: depristo
* Date: 8/2/12
* Time: 2:16 PM
*/
public class BCFVersion {
/**
* BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
* and m is the minor version, currently 1
*/
public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
final int majorVersion;
final int minorVersion;
public BCFVersion(int majorVersion, int minorVersion) {
this.majorVersion = majorVersion;
this.minorVersion = minorVersion;
}
/**
* @return the major version number of this BCF file
*/
public int getMajorVersion() {
return majorVersion;
}
/**
* @return the minor version number of this BCF file
*/
public int getMinorVersion() {
return minorVersion;
}
/**
* Return a new BCFVersion object describing the major and minor version of the BCF file in stream
*
* Note that stream must be at the very start of the file.
*
* @param stream
* @return a BCFVersion object, or null if stream doesn't contain a BCF file
* @throws IOException
*/
public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
stream.read(magicBytes);
if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
// we're a BCF file
final int majorByte = stream.read();
final int minorByte = stream.read();
return new BCFVersion( majorByte, minorByte );
} else
return null;
}
/**
* Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
* @param out
* @throws IOException
*/
public void write(final OutputStream out) throws IOException {
out.write(MAGIC_HEADER_START);
out.write(getMajorVersion() & 0xFF);
out.write(getMinorVersion() & 0xFF);
}
@Override
public String toString() {
return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
}
}

View File

@ -316,9 +316,9 @@ public class UserException extends ReviewedStingException {
public static class MissingWalker extends UserException {
public MissingWalker(String walkerName, String message) {
super(String.format("Walker %s is not available: %s", walkerName, message));
public static class DeprecatedWalker extends UserException {
public DeprecatedWalker(String walkerName, String version) {
super(String.format("Walker %s is no longer available in the GATK; it has been deprecated since version %s", walkerName, version));
}
}

View File

@ -4,7 +4,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.recalibration;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
@ -103,7 +103,7 @@ public class BaseRecalibration {
}
}
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
read.setBaseQualities(null, errorModel);

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import java.util.Arrays;
import java.util.List;
@ -41,7 +39,7 @@ public class QuantizationInfo {
for (final RecalDatum value : qualTable.getAllValues()) {
final RecalDatum datum = value;
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
qualHistogram[empiricalQual] += datum.getNumObservations(); // add the number of observations for every key
}
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
quantizeQualityScores(quantizationLevels);
@ -70,15 +68,15 @@ public class QuantizationInfo {
}
public GATKReportTable generateReportTable() {
GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME);
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual);
quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
}
return quantizedTable;
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
/**
* The object temporarily held by a read that describes all of it's covariates.

View File

@ -0,0 +1,305 @@
package org.broadinstitute.sting.utils.recalibration;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
* of reference mismatches seen for that combination of covariates.
*
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*/
@Invariant({
"estimatedQReported >= 0.0",
"! Double.isNaN(estimatedQReported)",
"! Double.isInfinite(estimatedQReported)",
"empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
"! Double.isNaN(empiricalQuality)",
"! Double.isInfinite(empiricalQuality)",
"numObservations >= 0",
"numMismatches >= 0",
"numMismatches <= numObservations"
})
public class RecalDatum {
private static final double UNINITIALIZED = -1.0;
/**
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
*/
private double estimatedQReported;
/**
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
*/
private double empiricalQuality;
/**
* number of bases seen in total
*/
private long numObservations;
/**
* number of bases seen that didn't match the reference
*/
private long numMismatches;
/**
* used when calculating empirical qualities to avoid division by zero
*/
private static final int SMOOTHING_CONSTANT = 1;
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
/**
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
*
* @param _numObservations
* @param _numMismatches
* @param reportedQuality
*/
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
numObservations = _numObservations;
numMismatches = _numMismatches;
estimatedQReported = reportedQuality;
empiricalQuality = UNINITIALIZED;
}
/**
* Copy copy into this recal datum, overwriting all of this objects data
* @param copy
*/
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.getNumObservations();
this.numMismatches = copy.getNumMismatches();
this.estimatedQReported = copy.estimatedQReported;
this.empiricalQuality = copy.empiricalQuality;
}
/**
* Add in all of the data from other into this object, updating the reported quality from the expected
* error rate implied by the two reported qualities
*
* @param other
*/
public synchronized void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.getNumObservations(), other.getNumMismatches());
estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations());
empiricalQuality = UNINITIALIZED;
}
public synchronized void setEstimatedQReported(final double estimatedQReported) {
if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
this.estimatedQReported = estimatedQReported;
}
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public final byte getEstimatedQReportedAsByte() {
return (byte)(int)(Math.round(getEstimatedQReported()));
}
//---------------------------------------------------------------------------------------------------------------
//
// Empirical quality score -- derived from the num mismatches and observations
//
//---------------------------------------------------------------------------------------------------------------
/**
* Returns the error rate (in real space) of this interval, or 0 if there are no obserations
* @return the empirical error rate ~= N errors / N obs
*/
@Ensures("result >= 0.0")
public double getEmpiricalErrorRate() {
if ( numObservations == 0 )
return 0.0;
else {
// cache the value so we don't call log over and over again
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
// smoothing is one error and one non-error observation, for example
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
return doubleMismatches / doubleObservations;
}
}
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
this.empiricalQuality = empiricalQuality;
}
public final double getEmpiricalQuality() {
if (empiricalQuality == UNINITIALIZED)
calcEmpiricalQuality();
return empiricalQuality;
}
public final byte getEmpiricalQualityAsByte() {
return (byte)(Math.round(getEmpiricalQuality()));
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
@Override
public String toString() {
return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality()));
}
public String stringForCSV() {
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
}
// /**
// * We don't compare the estimated quality reported because it may be different when read from
// * report tables.
// *
// * @param o the other recal datum
// * @return true if the two recal datums have the same number of observations, errors and empirical quality.
// */
// @Override
// public boolean equals(Object o) {
// if (!(o instanceof RecalDatum))
// return false;
// RecalDatum other = (RecalDatum) o;
// return super.equals(o) &&
// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
// }
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
public long getNumObservations() {
return numObservations;
}
public synchronized void setNumObservations(final long numObservations) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
this.numObservations = numObservations;
empiricalQuality = UNINITIALIZED;
}
public long getNumMismatches() {
return numMismatches;
}
@Requires({"numMismatches >= 0"})
public synchronized void setNumMismatches(final long numMismatches) {
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
this.numMismatches = numMismatches;
empiricalQuality = UNINITIALIZED;
}
@Requires({"by >= 0"})
public synchronized void incrementNumObservations(final long by) {
numObservations += by;
empiricalQuality = UNINITIALIZED;
}
@Requires({"by >= 0"})
public synchronized void incrementNumMismatches(final long by) {
numMismatches += by;
empiricalQuality = UNINITIALIZED;
}
@Requires({"incObservations >= 0", "incMismatches >= 0"})
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
public synchronized void increment(final long incObservations, final long incMismatches) {
incrementNumObservations(incObservations);
incrementNumMismatches(incMismatches);
}
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
public synchronized void increment(final boolean isError) {
incrementNumObservations(1);
if ( isError )
incrementNumMismatches(1);
}
// -------------------------------------------------------------------------------------
//
// Private implementation helper functions
//
// -------------------------------------------------------------------------------------
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized final void calcEmpiricalQuality() {
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
/**
* calculate the expected number of errors given the estimated Q reported and the number of observations
* in this datum.
*
* @return a positive (potentially fractional) estimate of the number of errors
*/
@Ensures("result >= 0.0")
private double calcExpectedErrors() {
return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
}
}

View File

@ -0,0 +1,309 @@
package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.HashSet;
import java.util.Set;
/**
* A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one
*
* @author Mark DePristo
* @since 07/27/12
*/
public class RecalDatumNode<T extends RecalDatum> {
private final static boolean USE_CHI2 = true;
protected static Logger logger = Logger.getLogger(RecalDatumNode.class);
private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY;
private final T recalDatum;
private double fixedPenalty = UNINITIALIZED;
private final Set<RecalDatumNode<T>> subnodes;
public RecalDatumNode(final T recalDatum) {
this(recalDatum, new HashSet<RecalDatumNode<T>>());
}
@Override
public String toString() {
return recalDatum.toString();
}
public RecalDatumNode(final T recalDatum, final Set<RecalDatumNode<T>> subnodes) {
this(recalDatum, UNINITIALIZED, subnodes);
}
protected RecalDatumNode(final T recalDatum, final double fixedPenalty) {
this(recalDatum, fixedPenalty, new HashSet<RecalDatumNode<T>>());
}
protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set<RecalDatumNode<T>> subnodes) {
this.recalDatum = recalDatum;
this.fixedPenalty = fixedPenalty;
this.subnodes = new HashSet<RecalDatumNode<T>>(subnodes);
}
public T getRecalDatum() {
return recalDatum;
}
public Set<RecalDatumNode<T>> getSubnodes() {
return subnodes;
}
public double getPenalty() {
if ( fixedPenalty != UNINITIALIZED )
return fixedPenalty;
else
return calcPenalty();
}
public double calcAndSetFixedPenalty(final boolean doEntireTree) {
fixedPenalty = calcPenalty();
if ( doEntireTree )
for ( final RecalDatumNode<T> sub : subnodes )
sub.calcAndSetFixedPenalty(doEntireTree);
return fixedPenalty;
}
public void addSubnode(final RecalDatumNode<T> sub) {
subnodes.add(sub);
}
public boolean isLeaf() {
return subnodes.isEmpty();
}
public int getNumBranches() {
return subnodes.size();
}
/**
* Total penalty is the sum of leaf node penalties
*
* This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by
* definition have 0 penalty unless they represent a pruned tree with underlying -- but now
* pruned -- subtrees
*
* @return
*/
public double totalPenalty() {
if ( isLeaf() )
return getPenalty();
else {
double sum = 0.0;
for ( final RecalDatumNode<T> sub : subnodes )
sum += sub.totalPenalty();
return sum;
}
}
public int maxDepth() {
int subMax = 0;
for ( final RecalDatumNode<T> sub : subnodes )
subMax = Math.max(subMax, sub.maxDepth());
return subMax + 1;
}
public int minDepth() {
if ( isLeaf() )
return 1;
else {
int subMin = Integer.MAX_VALUE;
for ( final RecalDatumNode<T> sub : subnodes )
subMin = Math.min(subMin, sub.minDepth());
return subMin + 1;
}
}
public int size() {
int size = 1;
for ( final RecalDatumNode<T> sub : subnodes )
size += sub.size();
return size;
}
public int numLeaves() {
if ( isLeaf() )
return 1;
else {
int size = 0;
for ( final RecalDatumNode<T> sub : subnodes )
size += sub.numLeaves();
return size;
}
}
private double calcPenalty() {
if ( USE_CHI2 )
return calcPenaltyChi2();
else
return calcPenaltyLog10(getRecalDatum().getEmpiricalErrorRate());
}
private double calcPenaltyChi2() {
if ( isLeaf() )
return 0.0;
else {
final long[][] counts = new long[subnodes.size()][2];
int i = 0;
for ( RecalDatumNode<T> subnode : subnodes ) {
counts[i][0] = subnode.getRecalDatum().getNumMismatches();
counts[i][1] = subnode.getRecalDatum().getNumObservations();
i++;
}
final double chi2 = new ChiSquareTestImpl().chiSquare(counts);
// StringBuilder x = new StringBuilder();
// StringBuilder y = new StringBuilder();
// for ( int k = 0; k < counts.length; k++) {
// if ( k != 0 ) {
// x.append(", ");
// y.append(", ");
// }
// x.append(counts[k][0]);
// y.append(counts[k][1]);
// }
// logger.info("x = c(" + x.toString() + ")");
// logger.info("y = c(" + y.toString() + ")");
// logger.info("chi2 = " + chi2);
return chi2;
//return Math.log10(chi2);
}
}
/**
* Calculate the penalty of this interval, given the overall error rate for the interval
*
* If the globalErrorRate is e, this value is:
*
* sum_i |log10(e_i) - log10(e)| * nObservations_i
*
* each the index i applies to all leaves of the tree accessible from this interval
* (found recursively from subnodes as necessary)
*
* @param globalErrorRate overall error rate in real space against which we calculate the penalty
* @return the cost of approximating the bins in this interval with the globalErrorRate
*/
@Requires("globalErrorRate >= 0.0")
@Ensures("result >= 0.0")
private double calcPenaltyLog10(final double globalErrorRate) {
if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
return 0.0;
if ( isLeaf() ) {
// this is leave node
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
// TODO -- how we can generalize this calculation?
// if ( this.qEnd <= minInterestingQual )
// // It's free to merge up quality scores below the smallest interesting one
// return 0;
// else {
// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations();
// }
} else {
double sum = 0;
for ( final RecalDatumNode<T> hrd : subnodes)
sum += hrd.calcPenaltyLog10(globalErrorRate);
return sum;
}
}
public RecalDatumNode<T> pruneToDepth(final int maxDepth) {
if ( maxDepth < 1 )
throw new IllegalArgumentException("maxDepth < 1");
else {
final Set<RecalDatumNode<T>> subPruned = new HashSet<RecalDatumNode<T>>(getNumBranches());
if ( maxDepth > 1 )
for ( final RecalDatumNode<T> sub : subnodes )
subPruned.add(sub.pruneToDepth(maxDepth - 1));
return new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, subPruned);
}
}
public RecalDatumNode<T> pruneByPenalty(final int maxElements) {
RecalDatumNode<T> root = this;
while ( root.size() > maxElements ) {
// remove the lowest penalty element, and continue
root = root.removeLowestPenaltyNode();
}
// our size is below the target, so we are good, return
return root;
}
/**
* Find the lowest penalty node in the tree, and return a tree without it
*
* Note this excludes the current (root) node
*
* @return
*/
private RecalDatumNode<T> removeLowestPenaltyNode() {
final Pair<RecalDatumNode<T>, Double> nodeToRemove = getMinPenaltyNode();
logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond());
final Pair<RecalDatumNode<T>, Boolean> result = removeNode(nodeToRemove.getFirst());
if ( ! result.getSecond() )
throw new IllegalStateException("Never removed any node!");
final RecalDatumNode<T> oneRemoved = result.getFirst();
if ( oneRemoved == null )
throw new IllegalStateException("Removed our root node, wow, didn't expect that");
return oneRemoved;
}
private Pair<RecalDatumNode<T>, Double> getMinPenaltyNode() {
final double myValue = isLeaf() ? Double.MAX_VALUE : getPenalty();
Pair<RecalDatumNode<T>, Double> maxNode = new Pair<RecalDatumNode<T>, Double>(this, myValue);
for ( final RecalDatumNode<T> sub : subnodes ) {
final Pair<RecalDatumNode<T>, Double> subFind = sub.getMinPenaltyNode();
if ( subFind.getSecond() < maxNode.getSecond() ) {
maxNode = subFind;
}
}
return maxNode;
}
private Pair<RecalDatumNode<T>, Boolean> removeNode(final RecalDatumNode<T> nodeToRemove) {
if ( this == nodeToRemove ) {
if ( isLeaf() )
throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove);
// node is the thing we are going to remove, but without any subnodes
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty);
return new Pair<RecalDatumNode<T>, Boolean>(node, true);
} else {
// did we remove something in a sub branch?
boolean removedSomething = false;
// our sub nodes with the penalty node removed
final Set<RecalDatumNode<T>> sub = new HashSet<RecalDatumNode<T>>(getNumBranches());
for ( final RecalDatumNode<T> sub1 : subnodes ) {
if ( removedSomething ) {
// already removed something, just add sub1 back to sub
sub.add(sub1);
} else {
// haven't removed anything yet, so try
final Pair<RecalDatumNode<T>, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove);
removedSomething = maybeRemoved.getSecond();
sub.add(maybeRemoved.getFirst());
}
}
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, sub);
return new Pair<RecalDatumNode<T>, Boolean>(node, removedSomething);
}
}
}

View File

@ -23,11 +23,13 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -59,7 +60,7 @@ import java.util.*;
* This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
*/
public class RecalDataManager {
public class RecalUtils {
public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments";
public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized";
public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0";
@ -85,13 +86,108 @@ public class RecalDataManager {
private static final String SCRIPT_FILE = "BQSR.R";
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
private static final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s");
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s");
private static final Pair<String, String> eventType = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d");
/**
* Generates two lists : required covariates and optional covariates based on the user's requests.
*
* Performs the following tasks in order:
* 1. Adds all requierd covariates in order
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
*
* @param argumentCollection the argument collection object for the recalibration walker
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
*/
public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
for (String requestedCovariateString : argumentCollection.COVARIATES) {
boolean foundClass = false;
for (Class<? extends Covariate> covClass : covariateClasses) {
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
foundClass = true;
if (!requiredClasses.contains(covClass) &&
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
try {
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
optionalCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
}
}
if (!foundClass) {
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
}
}
}
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
}
/**
* Adds the required covariates to a covariate list
*
* Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
if (classes.size() != 2)
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new QualityScoreCovariate());
return dest;
}
/**
* Adds the standard covariates to a covariate list
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
for (Class<?> covClass : classes) {
try {
final Covariate covariate = (Covariate) covClass.newInstance();
dest.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
return dest;
}
public static void listAvailableCovariates(Logger logger) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
// Print and exit if that's what was requested
logger.info("Available covariates:");
for (Class<?> covClass : covariateClasses)
logger.info(covClass.getSimpleName());
logger.info("");
}
public enum SOLID_RECAL_MODE {
@ -152,64 +248,6 @@ public class RecalDataManager {
}
}
/**
* Generates two lists : required covariates and optional covariates based on the user's requests.
*
* Performs the following tasks in order:
* 1. Adds all requierd covariates in order
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
*
* @param argumentCollection the argument collection object for the recalibration walker
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
*/
public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified
for (String requestedCovariateString : argumentCollection.COVARIATES) {
boolean foundClass = false;
for (Class<? extends Covariate> covClass : covariateClasses) {
if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
foundClass = true;
if (!requiredClasses.contains(covClass) &&
(argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
try {
final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it
optionalCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
}
}
if (!foundClass) {
throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
}
}
}
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
}
public static void listAvailableCovariates(Logger logger) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
// Print and exit if that's what was requested
logger.info("Available covariates:");
for (Class<?> covClass : covariateClasses)
logger.info(covClass.getSimpleName());
logger.info("");
}
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
int reportTableIndex = 0;
@ -272,8 +310,8 @@ public class RecalDataManager {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
rowIndex++;
}
@ -320,7 +358,7 @@ public class RecalDataManager {
files.getFirst().close();
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
executor.addArgs(csvFileName.getAbsolutePath());
executor.addArgs(plotFileName.getAbsolutePath());
executor.exec();
@ -480,14 +518,14 @@ public class RecalDataManager {
*/
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String)
colorSpace = ((String) attr).getBytes();
else
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
if (read.getReadNegativeStrandFlag())
@ -501,7 +539,7 @@ public class RecalDataManager {
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
prevBase = readBases[i];
}
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
}
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
@ -545,7 +583,7 @@ public class RecalDataManager {
* @return Returns true if the base was inconsistent with the color space
*/
public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG);
if (attr != null) {
final byte[] inconsistency = (byte[]) attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
@ -691,40 +729,4 @@ public class RecalDataManager {
}
/**
* Adds the required covariates to a covariate list
*
* Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
if (classes.size() != 2)
throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next.
dest.add(new QualityScoreCovariate());
return dest;
}
/**
* Adds the standard covariates to a covariate list
*
* @param classes list of classes to add to the covariate list
* @return the covariate list
*/
private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
for (Class<?> covClass : classes) {
try {
final Covariate covariate = (Covariate) covClass.newInstance();
dest.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
return dest;
}
}

View File

@ -1,11 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import java.io.File;
import java.io.PrintStream;
@ -33,13 +34,13 @@ public class RecalibrationReport {
public RecalibrationReport(final File RECAL_FILE) {
final GATKReport report = new GATKReport(RECAL_FILE);
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
RAC = initializeArgumentCollectionTable(argumentTable);
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
quantizationInfo = initializeQuantizationTable(quantizedTable);
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
@ -57,13 +58,13 @@ public class RecalibrationReport {
for (Covariate cov : requestedCovariates)
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
}
@ -85,7 +86,7 @@ public class RecalibrationReport {
private int countReadGroups(final GATKReportTable reportTable) {
Set<String> readGroups = new HashSet<String>();
for ( int i = 0; i < reportTable.getNumRows(); i++ )
readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
return readGroups.size();
}
@ -139,17 +140,17 @@ public class RecalibrationReport {
\ */
private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
final int covIndex = optionalCovariateIndexes.get(covName);
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempCOVarray[3] = event.index;
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
@ -164,11 +165,11 @@ public class RecalibrationReport {
*/
private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> qualTable) {
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempQUALarray[2] = event.index;
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
@ -183,9 +184,9 @@ public class RecalibrationReport {
*/
private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) {
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempRGarray[1] = event.index;
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
@ -193,13 +194,13 @@ public class RecalibrationReport {
}
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
(Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
datum.setEstimatedQReported(estimatedQReported);
@ -218,8 +219,8 @@ public class RecalibrationReport {
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
for ( int i = 0; i < table.getNumRows(); i++ ) {
final byte originalQual = (byte)i;
final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
final long quantizedCount = Long.parseLong(countObject.toString());
quals[originalQual] = quantizedQual;
@ -239,7 +240,7 @@ public class RecalibrationReport {
for ( int i = 0; i < table.getNumRows(); i++ ) {
final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null"))
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
@ -250,10 +251,10 @@ public class RecalibrationReport {
RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
else if (argument.equals("solid_recal_mode"))
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value);
else if (argument.equals("solid_nocall_strategy"))
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
@ -307,7 +308,7 @@ public class RecalibrationReport {
}
public void output(PrintStream output) {
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
}
public RecalibrationArgumentCollection getRAC() {

View File

@ -25,9 +25,7 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
/**

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -23,8 +23,10 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/*
@ -89,8 +91,3 @@ public interface Covariate {
public int maximumKeyValue();
}
interface RequiredCovariate extends Covariate {}
interface StandardCovariate extends Covariate {}
interface ExperimentalCovariate extends Covariate {}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.utils.recalibration.covariates;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public interface ExperimentalCovariate extends Covariate {}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.utils.recalibration.covariates;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public interface RequiredCovariate extends Covariate {}

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.utils.recalibration.covariates;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public interface StandardCovariate extends Covariate {}

View File

@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.ArrayList;
import java.util.Arrays;
@ -476,7 +476,6 @@ public class AlignmentUtils {
}
break;
case D:
case N:
if (!isDeletion) {
alignmentPos += elementLength;
} else {
@ -498,6 +497,7 @@ public class AlignmentUtils {
break;
case H:
case P:
case N:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
@ -516,16 +516,13 @@ public class AlignmentUtils {
final int elementLength = ce.getLength();
switch (ce.getOperator()) {
case I:
case S:
break;
case D:
case N:
alignmentLength += elementLength;
break;
case M:
alignmentLength += elementLength;
break;
case I:
case S:
case H:
case P:
break;

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
* in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples
*
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
*
* @param sampleNames the sample names
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
* @return new VariantContext subsetting to just the given samples
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
}
/**
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
*
* @param sampleNames
* @return
*/
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
return subContextFromSamples(sampleNames, true);
}
public VariantContext subContextFromSample(String sampleName) {
return subContextFromSamples(Collections.singleton(sampleName), true);
return subContextFromSamples(Collections.singleton(sampleName));
}
/**

View File

@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -83,6 +84,9 @@ import java.util.*;
* @since 06/12
*/
class BCF2Writer extends IndexingVariantContextWriter {
public static final int MAJOR_VERSION = 2;
public static final int MINOR_VERSION = 1;
/**
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
* is found in the input VC. This can be very dangerous as the genotype encoding
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
writer.close();
final byte[] headerBytes = capture.toByteArray();
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
outputStream.write(headerBytes);
} catch (IOException e) {

View File

@ -282,12 +282,12 @@ public abstract class BaseTest {
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
Assert.assertTrue(actual instanceof Double);
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
}
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
Assert.assertTrue(actual instanceof Double);
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
}
@ -303,13 +303,13 @@ public abstract class BaseTest {
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isNaN(actual));
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isInfinite(actual));
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
else {
final double delta = Math.abs(actual - expected);
final double ratio = Math.abs(actual / expected - 1.0);
Assert.assertTrue(delta < tolerance || ratio < tolerance);
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
}
}
}

View File

@ -24,9 +24,12 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.AfterMethod;
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
}
}
/** Test that we clear program records when requested */
@Test
public void testRemoveProgramRecords() {
logger.warn("Executing testRemoveProgramRecords");
// setup the data
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
// use defaults
SAMDataSource data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false);
List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
boolean removeProgramRecords = false;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
removeProgramRecords = true;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -33,15 +34,15 @@ public class BQSRGathererUnitTest {
for (GATKReportTable originalTable : originalReport.getTables()) {
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
List<String> columnsToTest = new LinkedList<String>();
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
}
else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) {
columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) {
columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
}

View File

@ -38,13 +38,17 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.util.*;
public class ArtificialReadPileupTestProvider {
final String refBases = "ACAGAGCTGACCCTCCCTCCCCTCTCCCAGTGCAACAGCACGGGCGGCGACTGCTTTTACCGAGGCTACACGTCAGGCGTGGCGGCTGTCCAGGACTGGTACCACTTCCACTATGTGGATCTCTGCTGAGGACCAGGAAAGCCAGCACCCGCAGAGACTCTTCCCCAGTGCTCCATACGATCACCATTCTCTGCAGAAGGTCAGACGTCACTGGTGGCCCCCCAGCCTCCTCAGCAGGGAAGGATACTGTCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGCCTTGGACCAGCTGGTGCAGTTTGCCTACACGGCTGAGATTGTGGTGGGCGAGGGC";
final int contigStart = 1;
final int contigStop = 10;
final int contigStop = refBases.length();
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
final String artificialContig = "chr1";
@ -54,16 +58,18 @@ public class ArtificialReadPileupTestProvider {
final int artificialMappingQuality = 60;
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
List<SAMReadGroupRecord> sampleRGs;
final String refBases = "AGGATACTGT";
List<String> sampleNames = new ArrayList<String>();
private String sampleName(int i) { return sampleNames.get(i); }
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
public final int offset = 5;
public final int locStart = 105; // start position where we desire artificial variant
private final int readLength = 10; // desired read length in pileup
public final int readOffset = 4;
private final int readStart = locStart - readOffset;
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart);
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,locStart-100,locStart+100);
public final String windowBases = refBases.substring(locStart-100-1,locStart+100);
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,windowBases.getBytes());
byte BASE_QUAL = 50;
@ -96,25 +102,28 @@ public class ArtificialReadPileupTestProvider {
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL);
}
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele,
boolean addBaseErrors, int phredScaledBaseErrorRate) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(final int eventLength,
final String altBases,
final int[] numReadsPerAllele,
final boolean addBaseErrors,
final int phredScaledBaseErrorRate) {
final String refChar = new String(new byte[]{referenceContext.getBase()});
String refAllele, altAllele;
if (eventLength == 0) {
// SNP case
refAllele = new String(new byte[]{referenceContext.getBase()});
altAllele = new String(altBases.substring(0,1));
refAllele = refChar;
altAllele = altBases.substring(0,1);
} else if (eventLength>0){
// insertion
refAllele = "";
altAllele = altBases.substring(0,eventLength);
refAllele = refChar;
altAllele = refChar+altBases/*.substring(0,eventLength)*/;
}
else {
// deletion
refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
altAllele = "";
refAllele = new String(referenceContext.getForwardBases()).substring(0,Math.abs(eventLength)+1);
altAllele = refChar;
}
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
@ -138,22 +147,21 @@ public class ArtificialReadPileupTestProvider {
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int offset = (contigStop-contigStart+1)/2;
int refAlleleLength = refAllele.length();
final int refAlleleLength = refAllele.length();
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
return new ReadBackedPileupImpl(loc,pileupElements);
}
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
int alleleLength = allele.length();
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readCounter = 0;
for ( int d = 0; d < numReadsPerAllele; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
byte[] readBases = trueHaplotype(allele, refAlleleLength, readLength);
if (addErrors)
addBaseErrors(readBases, phredScaledErrorRate);
@ -165,20 +173,20 @@ public class ArtificialReadPileupTestProvider {
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
boolean isBeforeDeletion = alleleLength<refAlleleLength;
boolean isBeforeInsertion = alleleLength>refAlleleLength;
int eventLength = alleleLength - refAlleleLength;
if (isReference)
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
read.setCigarString((readOffset+1)+"M"+ Math.abs(eventLength) + (isBeforeDeletion?"D":"I") +
(readBases.length-readOffset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
@ -187,16 +195,25 @@ public class ArtificialReadPileupTestProvider {
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength)));
}
return pileupElements;
}
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
/**
* Create haplotype with desired allele and reference context
* @param allele Desired allele string
* @param refAlleleLength Length of reference allele.
* @param desiredLength Desired haplotype length
* @return String with haplotype formed by (prefix)+allele bases + postfix
*/
private byte[] trueHaplotype(final String allele, final int refAlleleLength, final int desiredLength) {
// create haplotype based on a particular allele
String prefix = refBases.substring(0, offset);
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
final int startIdx= locStart - readOffset-1;
final String prefix = refBases.substring(startIdx, locStart-1);
final String postfix = refBases.substring(locStart+refAlleleLength-1,startIdx + desiredLength);
return (prefix+allele+postfix).getBytes();
}

View File

@ -45,7 +45,6 @@ import org.testng.annotations.Test;
*/
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
final String refBases = "AGGATACTGT";
final int nSamples = 1;
final int[] numReadsPerAllele = new int[]{10,10};
final String SAMPLE_PREFIX = "sample";
@ -65,7 +64,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
@Test
public void testBasicConsensusCounts() {
// 4 inserted bases, min cnt = 10
String altBases = "CCTCCTGAGA";
String altBases = "CCTC";
int eventLength = 4;
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
@ -73,13 +72,11 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
//altBases = "CCTCMTGAGA";
// test deletions
eventLength = 3;
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
Assert.assertEquals(alleles.get(0).getBaseString().substring(1,eventLength), new String(pileupProvider.getReferenceContext().getForwardBases()).substring(1,eventLength));
// same with min Reads = 11
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@ -92,14 +89,14 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
Assert.assertEquals(alleles.size(),0);
// test N's in insertions
altBases = "CCTCNTGAGA";
altBases = "CCTC";
eventLength = 4;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
Assert.assertEquals(alleles.get(1).getBaseString().substring(1,eventLength+1), altBases);
altBases = "CCTCNTGAGA";
altBases = "CCTCN";
eventLength = 5;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);

View File

@ -373,13 +373,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
//
// testing SnpEff
// testing MinIndelFraction
//
// --------------------------------------------------------------------------------------------------------------
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
@Test
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -403,4 +403,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
executeTest("test minIndelFraction 1.0", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing Ns in CIGAR
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
}

View File

@ -34,7 +34,7 @@ import java.util.Arrays;
import java.util.List;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"62f81e7d2082fbc71cae0101c27fefad", // tranches
"b9709e4180e56abc691b208bd3e8626c", // recal file
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {

View File

@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
final String collapsed = BCF2Utils.collapseStringList(strings);
Assert.assertEquals(collapsed, expected);
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
}
// -----------------------------------------------------------------

View File

@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
}
@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
executeTest("Test writing samtools WEx BCF example", spec1);
}

View File

@ -1,5 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest {
requestedCovariates[2] = coCov;
requestedCovariates[3] = cyCov;
ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
// check that the length is correct
Assert.assertEquals(rc.getMismatchesKeySet().length, length);

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest {
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
int nKeys = 0; // keep track of how many keys were produced
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);

View File

@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
public void run(final VariantContext vc) {
if ( samples == null )
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
VariantContext sub = vc.subContextFromSamples(samples, true);
VariantContext sub = vc.subContextFromSamples(samples);
sub.getNSamples();
}
};

View File

@ -347,6 +347,7 @@ class DataProcessingPipeline extends QScript {
this.knownSites ++= qscript.dbSNP
this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
this.input_file :+= inBam
this.disable_indel_quals = true
this.out = outRecalFile
if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
@ -359,7 +360,6 @@ class DataProcessingPipeline extends QScript {
case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
this.input_file :+= inBam
this.BQSR = inRecalFile
this.disable_indel_quals = true
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
this.out = outBam
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)

View File

@ -166,6 +166,7 @@ class PacbioProcessingPipeline extends QScript {
this.knownSites :+= dbSNP
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
this.input_file :+= inBam
this.disable_indel_quals = true
this.out = outRecalFile
this.analysisName = queueLogDir + outRecalFile + ".covariates"
this.jobName = queueLogDir + outRecalFile + ".covariates"
@ -178,7 +179,6 @@ class PacbioProcessingPipeline extends QScript {
this.input_file :+= inBam
this.BQSR = inRecalFile
this.out = outBam
this.disable_indel_quals = true
this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration"

View File

@ -41,7 +41,7 @@ class DataProcessingPipelineTest {
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "0f0b32e94640a8d548b4c1134ad4c075"
spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c"
PipelineTest.executeTest(spec)
}
@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "6b4f13d22b45d7d617ee959fdc278ed2"
spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59"
PipelineTest.executeTest(spec)
}

Some files were not shown because too many files have changed in this diff Show More