Merge remote-tracking branch 'unstable/master'

This commit is contained in:
Eric Banks 2012-08-20 21:18:54 -04:00
commit 5b1781fdac
272 changed files with 5955 additions and 3968 deletions

View File

@ -76,7 +76,8 @@
<property name="staging.dir" value="staging" />
<property name="default.executable" value="none" />
<!-- Javadoc/Scaladoc directories -->
<!-- GATKDocs/Javadoc/Scaladoc directories -->
<property name="gatkdocs.dir" value="gatkdocs" />
<property name="javadoc.dir" value="javadoc" />
<property name="scaladoc.dir" value="scaladoc" />
@ -92,12 +93,10 @@
<!-- To disable for test targets, run with -Duse.contracts=false -->
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
<property name="java.contracts.dir" value="${build.dir}/java/contracts" />
<property name="contracts.version" value="1.0-20110609" />
<property name="contracts.version" value="1.0-r139" />
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
<property name="contract.dump.dir" value="dump" />
<property name="gatkdocs.dir" value="gatkdocs" />
<!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
<property name="halt" value="yes" />
@ -208,19 +207,19 @@
<include name="**/*.java" />
</fileset>
<pathconvert property="external.build.dir">
<path path="${java.classes}"/>
</pathconvert>
<path id="external.build.dir">
<path path="${java.classes}" />
</path>
<pathconvert property="external.dist.dir">
<path id="external.dist.dir">
<path path="${dist.dir}" />
</pathconvert>
</path>
<!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes -->
<pathconvert property="external.gatk.classpath">
<path id="external.gatk.classpath">
<path path="${java.classes}"/>
<path refid="external.dependencies" />
</pathconvert>
</path>
<!-- the path for resources that need to go into the GATK jar;
any additional resources should go into this set. -->
@ -430,9 +429,9 @@
<target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external">
<subant target="compile" genericantfile="build.xml">
<property name="build.dir" value="${external.build.dir}" />
<property name="dist.dir" value="${external.dist.dir}" />
<property name="gatk.classpath" value="${external.gatk.classpath}" />
<property name="build.dir" refid="external.build.dir" />
<property name="dist.dir" refid="external.dist.dir" />
<property name="gatk.classpath" refid="external.gatk.classpath" />
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant>
</target>
@ -680,9 +679,9 @@
</jar>
<subant target="dist" genericantfile="build.xml">
<property name="build.dir" value="${external.build.dir}" />
<property name="dist.dir" value="${external.dist.dir}" />
<property name="gatk.classpath" value="${external.gatk.classpath}" />
<property name="build.dir" refid="external.build.dir" />
<property name="dist.dir" refid="external.dist.dir" />
<property name="gatk.classpath" refid="external.gatk.classpath" />
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant>
</target>

View File

@ -52,7 +52,7 @@
<dependency org="gov.nist" name="Jama" rev="1.0.2"/>
<!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
<!-- Dependencies for the html walker documention -->
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
@ -87,7 +87,7 @@
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
<!-- Contracts for Java and dependencies -->
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609"/>
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
<dependency org="asm" name="asm-all" rev="3.3.1"/>
<!-- POI, for reading pipeline files -->

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {

View File

@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;

View File

@ -1,6 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
/**
* Created by IntelliJ IDEA.
@ -30,24 +35,26 @@ public class ErrorModel {
private static final boolean compressRange = false;
private static final double log10MinusE = Math.log10(Math.exp(1.0));
private static final boolean DEBUG = false;
/**
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
*
* @param minQualityScore Minimum site quality score to evaluate
* @param maxQualityScore Maximum site quality score to evaluate
* @param phredScaledPrior Prior for site quality
* @param UAC Argument Collection
* @param refSamplePileup Reference sample pileup
* @param refSampleVC VC with True alleles in reference sample pileup
* @param minPower Minimum power
*/
public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior,
ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) {
this.maxQualityScore = maxQualityScore;
this.minQualityScore = minQualityScore;
this.phredScaledPrior = phredScaledPrior;
log10minPower = Math.log10(minPower);
public ErrorModel (final UnifiedArgumentCollection UAC,
final ReadBackedPileup refSamplePileup,
VariantContext refSampleVC, final ReferenceContext refContext) {
this.maxQualityScore = UAC.maxQualityScore;
this.minQualityScore = UAC.minQualityScore;
this.phredScaledPrior = UAC.phredScaledPrior;
log10minPower = Math.log10(UAC.minPower);
PairHMMIndelErrorModel pairModel = null;
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
double[][] perReadLikelihoods = null;
double[] model = new double[maxQualityScore+1];
Arrays.fill(model,Double.NEGATIVE_INFINITY);
@ -61,11 +68,17 @@ public class ErrorModel {
break;
}
}
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
if (refSampleVC.isIndel()) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
}
}
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
// maximum uncertainty if there's no ref data at site
model[q] = p;
@ -75,23 +88,48 @@ public class ErrorModel {
else {
hasData = true;
int matches = 0;
int coverage = refSamplePileup.getNumberOfElements();
int coverage = 0;
Allele refAllele = refSampleVC.getReference();
if (refSampleVC.isIndel()) {
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
if (!haplotypeMap.isEmpty())
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
}
int idx = 0;
for (PileupElement refPileupElement : refSamplePileup) {
if (DEBUG)
System.out.println(refPileupElement.toString());
boolean isMatch = false;
for (Allele allele : refSampleVC.getAlleles())
isMatch |= pileupElementMatches(refPileupElement, allele, refAllele);
for (Allele allele : refSampleVC.getAlleles()) {
boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
if (DEBUG) System.out.println(m);
isMatch |= m;
}
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
if (!isInformativeElement(perAlleleLikelihoods))
matches++;
else
matches += (isMatch?1:0);
matches += (isMatch?1:0);
// System.out.format("MATCH:%b\n",isMatch);
} else {
matches += (isMatch?1:0);
}
coverage++;
}
int mismatches = coverage - matches;
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
if (coverage==0)
model[q] = p;
else
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
}
this.refDepth = coverage;
}
@ -101,6 +139,17 @@ public class ErrorModel {
}
@Requires("likelihoods.length>0")
private boolean isInformativeElement(double[] likelihoods) {
// if likelihoods are the same, they're not informative
final double thresh = 0.1;
int maxIdx = MathUtils.maxElementIndex(likelihoods);
int minIdx = MathUtils.minElementIndex(likelihoods);
if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
return false;
else
return true;
}
/**
* Simple constructor that just takes a given log-probability vector as error model.
* Only intended for unit testing, not general usage.
@ -115,23 +164,27 @@ public class ErrorModel {
}
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) {
/* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
if (DEBUG)
System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
*/
//pileupElement.
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
if (allele.isReference()) {
// for a ref allele, any base mismatch or new indel is a mismatch.
if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case
return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]);
if(allele.getBases().length>0)
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
// could clean up/simplify this when unpadding is removed
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
else
// either null allele to compare, or ref/alt lengths are different (indel by definition).
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
}
// for non-ref alleles to compare:
if (refAllele.getBases().length == allele.getBases().length)
// alleles have the same length (eg snp or mnp)
return pileupElement.getBase() == allele.getBases()[0];
@ -209,18 +262,19 @@ public class ErrorModel {
}
public String toString() {
String result = "(";
StringBuilder result = new StringBuilder("(");
boolean skipComma = true;
for (double v : probabilityVector.getProbabilityVector()) {
if (skipComma) {
skipComma = false;
}
else {
result += ",";
result.append(",");
}
result += String.format("%.4f", v);
result.append(String.format("%.4f", v));
}
return result + ")";
result.append(")");
return result.toString();
}
public static int getTotalReferenceDepth(HashMap<String, ErrorModel> perLaneErrorModels) {

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC;
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false;
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy;
this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
return newPool;
}
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
// todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
/**
*
* @param set ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
continue;
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
}
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next();
}
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone();
while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
// for current conformation, get all possible ways to break vector K into two components G1 and G2
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
while (innerIterator.hasNext()) {
// check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( numOriginalAltAlleles == numNewAltAlleles) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// find the genotype with maximum likelihoods
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
@ -681,8 +681,8 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
}
// per-pool logging of AC and AF
gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
// remove PLs if necessary
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import java.util.*;
public abstract class PoolGenotypeLikelihoods {
public abstract class GeneralPloidyGenotypeLikelihoods {
protected final int numChromosomes;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
private static final boolean FAST_GL_COMPUTATION = true;
// constructor with given logPL elements
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
this.alleles = alleles;
this.nAlleles = alleles.size();
numChromosomes = ploidy;
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
} else {
if (logLikelihoods.length != likelihoodDim)
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
}
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
final int numAlleles = currentState.length;
final int ploidy = restrictSumTo;
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
}
else
throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
double[] newPLs = new double[newPLSize];
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
newCount[idx] = pVec[permutationKey[idx]];
// get corresponding index from new count
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
newPLs[outputIdx] = pl;
if (VERBOSE) {
System.out.println("Old Key:"+Arrays.toString(pVec));

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
//protected Set<String> laneIDs;
public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
final protected UnifiedArgumentCollection UAC;
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC,logger);
this.UAC = UAC;
@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
referenceSampleVC.getAlleles())
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
.make();
}
@ -138,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
protected static class PoolGenotypeData {
public final String name;
public final PoolGenotypeLikelihoods GL;
public final GeneralPloidyGenotypeLikelihoods GL;
public final int depth;
public final List<Allele> alleles;
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
this.name = name;
this.GL = GL;
this.depth = depth;
@ -237,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
// create the GenotypeLikelihoods object
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
// actually compute likelihoods
final int nGoodBases = GL.add(pileup, UAC);
if ( nGoodBases > 0 )
@ -247,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
// find the alternate allele(s) that we should be using
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
if (alleles == null || alleles.isEmpty())
return null;
// start making the VariantContext
final GenomeLoc loc = ref.getLocus();
final int endLoc = getEndLocation(tracker, ref, alleles);
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
allAlleles, alleles);
// normalize in log space so that max element is zero.
@ -313,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
refLanePileup = refPileup.getPileupForLane(laneID);
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower));
perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref));
}
return perLaneErrorModels;
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
Abstract methods - must be implemented in derived classes
*/
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,

View File

@ -18,26 +18,30 @@ import java.util.*;
* Time: 10:06 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
final PairHMMIndelErrorModel pairModel;
final LinkedHashMap<Allele, Haplotype> haplotypeMap;
final ReferenceContext refContext;
final int eventLength;
double[][] readHaplotypeLikelihoods;
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
final byte refBase;
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.pairModel = pairModel;
this.haplotypeMap = haplotypeMap;
this.refContext = referenceContext;
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
// todo - not needed if indel alleles have base at current position
this.refBase = referenceContext.getBase();
}
// -------------------------------------------------------------------------------------
@ -138,10 +142,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
if (!hasReferenceSampleData) {
final int numHaplotypes = haplotypeMap.size();
final int readCounts[] = new int[pileup.getNumberOfElements()];
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
n = readHaplotypeLikelihoods.length;
} else {
Allele refAllele = null;
@ -161,7 +163,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
int idx =0;
for (Allele allele : alleles) {
int cnt = numSeenBases.get(idx);
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0));
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
}
n++;

View File

@ -32,17 +32,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded = false;
/*
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -60,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
*/
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
super(UAC, logger);
@ -70,20 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation){
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
@ -94,12 +85,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
final List<Allele> allAllelesToUse){
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
List<Allele> alleles = pair.first;
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
allelesArePadded = pair.second;
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
haplotypeMap.clear();
@ -131,6 +120,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
protected int getEndLocation(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final List<Allele> allelesToUse) {
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
}
}

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
* and posteriors given a pile of bases and quality scores
*
*/
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
final List<Allele> myAlleles;
final int[] alleleIndices;
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
* @param useBQAedPileup Use BAQed pileup
* @param ignoreLaneInformation If true, lane info is ignored
*/
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.useBAQedPileup = useBQAedPileup;
myAlleles = new ArrayList<Allele>(alleles);
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
Allele refAllele = alleles.get(0);
//sanity check: by construction, first allele should ALWAYS be the reference alleles
if (!refAllele.isReference())
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
if (myAlleles.size() < BaseUtils.BASES.length) {
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,

View File

@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation) {
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,

View File

@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -57,8 +56,12 @@ public class GenotypingEngine {
// This function is the streamlined approach, currently not being used
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser ) {
// Prepare the list of haplotype indices to genotype
final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
@ -184,8 +187,13 @@ public class GenotypingEngine {
}
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser,
final ArrayList<VariantContext> activeAllelesToGenotype ) {
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
@ -220,7 +228,6 @@ public class GenotypingEngine {
}
}
// Walk along each position in the key set and create each event to be outputted
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
@ -317,7 +324,7 @@ public class GenotypingEngine {
}
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
final int MAX_SIZE_TO_COMBINE = 10;
final int MAX_SIZE_TO_COMBINE = 15;
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
if( startPosKeySet.size() <= 1 ) { return; }
@ -334,10 +341,10 @@ public class GenotypingEngine {
boolean isBiallelic = true;
VariantContext thisVC = null;
VariantContext nextVC = null;
int x11 = 0;
int x12 = 0;
int x21 = 0;
int x22 = 0;
double x11 = Double.NEGATIVE_INFINITY;
double x12 = Double.NEGATIVE_INFINITY;
double x21 = Double.NEGATIVE_INFINITY;
double x22 = Double.NEGATIVE_INFINITY;
for( final Haplotype h : haplotypes ) {
// only make complex substitutions out of consecutive biallelic sites
@ -360,21 +367,24 @@ public class GenotypingEngine {
}
}
// count up the co-occurrences of the events for the R^2 calculation
// BUGBUG: use haplotype likelihoods per-sample to make this more accurate
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11++; }
else { x12++; }
} else {
if( nextHapVC == null ) { x21++; }
else { x22++; }
final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
haplotypeList.add(h);
for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
} else {
if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
}
}
}
if( thisVC == null || nextVC == null ) {
continue;
//throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
}
if( isBiallelic ) {
final double R2 = calculateR2LD( x11, x12, x21, x22 );
final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
if( DEBUG ) {
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
System.out.println("-- " + thisVC);
@ -419,24 +429,21 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart();
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
byte[] refBases = new byte[]{};
byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte);
}
if( nextVC.hasReferenceBaseForIndel() ) {
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0;
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
}
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -445,12 +452,11 @@ public class GenotypingEngine {
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
}
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
final int total = x11 + x12 + x21 + x22;
final double pa1b1 = ((double) x11) / ((double) total);
final double pa1b2 = ((double) x12) / ((double) total);
final double pa2b1 = ((double) x21) / ((double) total);
protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
final double total = x11 + x12 + x21 + x22;
final double pa1b1 = x11 / total;
final double pa1b2 = x12 / total;
final double pa2b1 = x21 / total;
final double pa1 = pa1b1 + pa1b2;
final double pb1 = pa1b1 + pa2b1;
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
@ -530,35 +536,37 @@ public class GenotypingEngine {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
boolean allN = true;
for( final byte b : insertionBases ) {
if( b != (byte) 'N' ) {
allN = false;
break;
}
{
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
insertionAlleles.add( Allele.create(refByte, true) );
}
if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else {
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
if( BaseUtils.isAllRegularBases(insertionBases) ) {
insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
}
}
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
alignmentPos += elementLength;
break;
}
case S:
{
alignmentPos += elementLength;
break;
}
case D:
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
{
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen?
@ -568,13 +576,20 @@ public class GenotypingEngine {
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
//} else {
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
deletionAlleles.add( Allele.create(refByte, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
}
//}
refPos += elementLength;
break;
}
case M:
case EQ:
case X:
{
int numSinceMismatch = -1;
int stopOfMismatch = -1;
int startOfMismatch = -1;
@ -597,11 +612,13 @@ public class GenotypingEngine {
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refBases, true ) );
snpAlleles.add( Allele.create( mismatchBases, false ) );
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refBases, true ) );
snpAlleles.add( Allele.create( mismatchBases, false ) );
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
}
numSinceMismatch = -1;
stopOfMismatch = -1;
startOfMismatch = -1;
@ -611,6 +628,7 @@ public class GenotypingEngine {
alignmentPos++;
}
break;
}
case N:
case H:
case P:

View File

@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -56,6 +58,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -103,7 +106,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS)
@ActiveRegionExtension(extension=65, maxRegion=275)
@ActiveRegionExtension(extension=65, maxRegion=300)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
@ -126,9 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
protected int MIN_PRUNE_FACTOR = 1;
@Advanced
@Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
@Advanced
@Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
@ -139,9 +144,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
@Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
protected boolean USE_EXPANDED_TRIGGER_SET = false;
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
protected boolean USE_ALLELES_TRIGGER = false;
@ -188,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
protected String[] annotationClassesToUse = { "Standard" };
@ArgumentCollection
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
// the calculation arguments
private UnifiedGenotyperEngine UG_engine = null;
@ -239,12 +241,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
// initialize the output VCF header
@ -308,8 +310,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
public boolean wantsNonPrimaryReads() { return true; }
@Override
@Ensures({"result >= 0.0", "result <= 1.0"})
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
@ -318,56 +320,52 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
}
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
return 1.0;
return new ActivityProfileResult(1.0);
}
}
if( USE_ALLELES_TRIGGER ) {
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
}
if( context == null ) { return 0.0; }
if( context == null ) { return new ActivityProfileResult(0.0); }
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
noCall.add(Allele.NO_CALL);
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
for( final String sample : splitContexts.keySet() ) {
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
Arrays.fill(genotypeLikelihoods, 0.0);
for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) {
final byte qual = ( USE_EXPANDED_TRIGGER_SET ?
( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() )
: p.getQual() );
if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) {
for( final PileupElement p : sample.getValue().getBasePileup() ) {
final byte qual = p.getQual();
if( p.isDeletion() || qual > (byte) 18) {
int AA = 0; final int AB = 1; int BB = 2;
if( USE_EXPANDED_TRIGGER_SET ) {
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ||
(!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) {
AA = 2;
BB = 0;
}
} else {
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
AA = 2;
BB = 0;
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
AA = 2;
BB = 0;
if( p.isNextToSoftClip() ) {
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
}
}
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
}
}
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
final ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
}
//---------------------------------------------------------------------------------------------------------------
@ -416,45 +414,48 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
// add some custom annotations to the calls
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
// Calculate the number of variants on the haplotype
int maxNumVar = 0;
for( final Allele allele : callResult.getFirst().getAlleles() ) {
if( !allele.isReference() ) {
for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
final int numVar = haplotype.getEventMap().size();
if( numVar > maxNumVar ) { maxNumVar = numVar; }
if( !GENOTYPE_FULL_ACTIVE_REGION ) {
// add some custom annotations to the calls
// Calculate the number of variants on the haplotype
int maxNumVar = 0;
for( final Allele allele : callResult.getFirst().getAlleles() ) {
if( !allele.isReference() ) {
for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
final int numVar = haplotype.getEventMap().size();
if( numVar > maxNumVar ) { maxNumVar = numVar; }
}
}
}
}
// Calculate the event length
int maxLength = 0;
for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
final int length = a.length() - annotatedCall.getReference().length();
if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
}
// Calculate the event length
int maxLength = 0;
for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
final int length = a.length() - annotatedCall.getReference().length();
if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
}
myAttributes.put("NVH", maxNumVar);
myAttributes.put("NumHapEval", bestHaplotypes.size());
myAttributes.put("NumHapAssembly", haplotypes.size());
myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
myAttributes.put("EVENTLENGTH", maxLength);
myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
myAttributes.put("extType", annotatedCall.getType().toString() );
myAttributes.put("NVH", maxNumVar);
myAttributes.put("NumHapEval", bestHaplotypes.size());
myAttributes.put("NumHapAssembly", haplotypes.size());
myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
myAttributes.put("EVENTLENGTH", maxLength);
myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
myAttributes.put("extType", annotatedCall.getType().toString() );
//if( likelihoodCalculationEngine.haplotypeScore != null ) {
// myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
//}
if( annotatedCall.hasAttribute("QD") ) {
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
//if( likelihoodCalculationEngine.haplotypeScore != null ) {
// myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
//}
if( annotatedCall.hasAttribute("QD") ) {
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
}
}
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
@ -493,7 +494,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
//---------------------------------------------------------------------------------------------------------------
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
activeRegion.clearReads();
@ -522,7 +523,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec);
}
}

View File

@ -82,11 +82,11 @@ public class KBestPaths {
}
}
protected static class PathComparatorLowestEdge implements Comparator<Path> {
public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge;
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path> {
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
//}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -99,7 +99,7 @@ public class KBestPaths {
}
}
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
Collections.sort(bestPaths, new PathComparatorTotalScore() );
Collections.reverse(bestPaths);
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
@ -114,8 +114,8 @@ public class KBestPaths {
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
Collections.sort(bestPaths, new PathComparatorTotalScore() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
}
bestPaths.add(path);
} else if( n.val > 10000) {

View File

@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -50,18 +51,17 @@ public class LikelihoodCalculationEngine {
}
public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
final int numHaplotypes = haplotypes.size();
int X_METRIC_LENGTH = 0;
for( final String sample : perSampleReadList.keySet() ) {
for( final GATKSAMRecord read : perSampleReadList.get(sample) ) {
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final GATKSAMRecord read : sample.getValue() ) {
final int readLength = read.getReadLength();
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
}
}
int Y_METRIC_LENGTH = 0;
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
final int haplotypeLength = haplotypes.get(jjj).getBases().length;
for( final Haplotype h : haplotypes ) {
final int haplotypeLength = h.getBases().length;
if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
}
@ -90,8 +90,10 @@ public class LikelihoodCalculationEngine {
final int numHaplotypes = haplotypes.size();
final int numReads = reads.size();
final double[][] readLikelihoods = new double[numHaplotypes][numReads];
final int[][] readCounts = new int[numHaplotypes][numReads];
for( int iii = 0; iii < numReads; iii++ ) {
final GATKSAMRecord read = reads.get(iii);
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
final byte[] overallGCP = new byte[read.getReadLength()];
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
@ -103,7 +105,7 @@ public class LikelihoodCalculationEngine {
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
}
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
@ -114,10 +116,11 @@ public class LikelihoodCalculationEngine {
readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
readQuals, readInsQuals, readDelQuals, overallGCP,
haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
readCounts[jjj][iii] = readCount;
}
}
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] );
haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
}
}
@ -142,10 +145,20 @@ public class LikelihoodCalculationEngine {
}
return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
}
// This function takes just a single sample and a haplotypeMapping
@Requires({"haplotypeMapping.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
final TreeSet<String> sampleSet = new TreeSet<String>();
sampleSet.add(sample);
return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
}
// This function takes a set of samples to pool over and a haplotypeMapping
@Requires({"haplotypeMapping.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
final int numHaplotypes = haplotypeMapping.size();
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
@ -154,17 +167,21 @@ public class LikelihoodCalculationEngine {
}
// compute the diploid haplotype likelihoods
// todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
double haplotypeLikelihood = 0.0;
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
for( final String sample : samples ) {
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
}
}
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
}
@ -176,48 +193,6 @@ public class LikelihoodCalculationEngine {
return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
}
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final Set<String> samples ) {
// set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
for( final Haplotype h : haplotypes ) {
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
list.add(h);
haplotypeMapping.add(list);
}
final int numHaplotypes = haplotypeMapping.size();
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
for( int iii = 0; iii < numHaplotypes; iii++ ) {
Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
}
// compute the diploid haplotype likelihoods
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
double haplotypeLikelihood = 0.0;
for( final String sample : samples ) {
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
}
}
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
}
}
}
}
// normalize the diploid likelihoods matrix
return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
}
@Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
@Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"})
protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) {
@ -306,12 +281,19 @@ public class LikelihoodCalculationEngine {
final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add(0); // always start with the reference haplotype
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together
// set up the default 1-to-1 haplotype mapping object
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
for( final Haplotype h : haplotypes ) {
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
list.add(h);
haplotypeMapping.add(list);
}
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
int hap1 = 0;
int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) {
@ -343,9 +325,9 @@ public class LikelihoodCalculationEngine {
public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
for( final String sample : perSampleReadList.keySet() ) {
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
final ArrayList<GATKSAMRecord> readsForThisSample = perSampleReadList.get(sample);
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
@ -355,7 +337,7 @@ public class LikelihoodCalculationEngine {
for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
double maxLikelihood = Double.NEGATIVE_INFINITY;
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
final double likelihood = h.getReadLikelihoods(sample)[iii];
final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
if( likelihood > maxLikelihood ) {
maxLikelihood = likelihood;
}
@ -390,13 +372,13 @@ public class LikelihoodCalculationEngine {
readList = new ArrayList<GATKSAMRecord>();
alleleReadMap.put(Allele.NO_CALL, readList);
}
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) {
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
readList.add(read);
}
}
returnMap.put(sample, alleleReadMap);
returnMap.put(sample.getKey(), alleleReadMap);
}
return returnMap;
}

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -68,7 +69,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
}
private void createDeBruijnGraphs( final ArrayList<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
graphs.clear();
// create the graph
@ -161,7 +162,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
}
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final ArrayList<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
final byte[] refSequence = refHaplotype.getBases();
if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
@ -183,6 +184,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final GATKSAMRecord read : reads ) {
final byte[] sequence = read.getReadBases();
final byte[] qualities = read.getBaseQualities();
final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not readuced
if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {
@ -194,6 +196,14 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
break;
}
}
int countNumber = 1;
if (read.isReducedRead()) {
// compute mean number of reduced read counts in current kmer span
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
// precise rounding can make a difference with low consensus counts
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
}
if( !badKmer ) {
// get the kmers
final byte[] kmer1 = new byte[KMER_LENGTH];
@ -201,7 +211,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final byte[] kmer2 = new byte[KMER_LENGTH];
System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
addKmersToGraph(graph, kmer1, kmer2, false);
for (int k=0; k < countNumber; k++)
addKmersToGraph(graph, kmer1, kmer2, false);
}
}
}
@ -230,7 +241,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return true;
}
private void printGraphs() {
protected void printGraphs() {
int count = 0;
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
GRAPH_WRITER.println("digraph kmer" + count++ +" {");
@ -281,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -311,7 +322,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );

View File

@ -50,20 +50,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
};
}
@ -74,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest {
Arrays.asList(params.md5));
executeTest("testBQSR-"+params.args, spec).getFirst();
WalkerTestSpec specNT2 = new WalkerTestSpec(
params.getCommandLine() + " -nt 2",
Arrays.asList(params.md5));
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
// TODO -- re-enable once parallelization is fixed in BaseRecalibrator
//WalkerTestSpec specNT2 = new WalkerTestSpec(
// params.getCommandLine() + " -nt 2",
// Arrays.asList(params.md5));
//executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
}
@Test
@ -94,6 +96,20 @@ public class BQSRIntegrationTest extends WalkerTest {
executeTest("testBQSRFailWithoutDBSNP", spec);
}
@Test
public void testBQSRFailWithSolidNoCall() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
" -T BaseRecalibrator" +
" -R " + b36KGReference +
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.class);
executeTest("testBQSRFailWithSolidNoCall", spec);
}
private static class PRTest {
final String args;
final String md5;

View File

@ -19,7 +19,7 @@ import java.util.Arrays;
* Time: 7:44 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolAFCalculationModelUnitTest extends BaseTest {
public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));

View File

@ -41,7 +41,7 @@ import java.io.PrintStream;
import java.util.*;
public class PoolGenotypeLikelihoodsUnitTest {
public class GeneralPloidyGenotypeLikelihoodsUnitTest {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
@ -49,11 +49,18 @@ public class PoolGenotypeLikelihoodsUnitTest {
private static final boolean SIMULATE_NOISY_PILEUP = false;
private static final int NUM_SIMULATED_OBS = 10;
void PoolGenotypeLikelihoodsUnitTest() {
UAC.minQualityScore = 5;
UAC.maxQualityScore = 40;
UAC.phredScaledPrior = (byte)20;
UAC.minPower = 0.0;
}
@Test
public void testStoringLikelihoodElements() {
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
// basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
int ploidy = 20;
int numAlleles = 4;
@ -71,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int k=0; k < gls.length; k++)
gls[k]= (double)k;
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
double[] glnew = gl.getLikelihoods();
Assert.assertEquals(gls, glnew);
@ -83,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int ploidy = 2; ploidy < 10; ploidy++) {
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
}
@ -95,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create iterator, compare linear index given by iterator with closed form function
int numAlleles = 4;
int ploidy = 2;
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
while(iterator.hasNext()) {
System.out.format("\n%d:",iterator.getLinearIndex());
@ -104,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
System.out.format("%d ",aa);
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
System.out.format("Computed idx = %d\n",computedIdx);
iterator.next();
}
@ -133,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
allelesToSubset.add(Allele.create("A",false));
allelesToSubset.add(Allele.create("C",false));
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
originalAlleles, allelesToSubset);
@ -163,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testIndexIterator() {
int[] seed = new int[]{1,2,3,4};
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
@ -221,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
while(iterator.hasNext()) {
int[] a = iterator.getCurrentVector();
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
if (VERBOSE) {
System.out.format("%d:",iterator.getLinearIndex());
for (int i=0; i < seed.length; i++)
@ -251,8 +258,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte minQ = 5;
final byte maxQ = 40;
final byte refByte = refPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
@ -270,7 +275,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0);
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -287,19 +292,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte minQ = 5;
final byte maxQ = 40;
final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
trueAlleles.add(Allele.create("TC", false));
trueAlleles.add(Allele.create(refByte, true));
trueAlleles.add(Allele.create((char)refByte + "TC", false));
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
final int[] matchArray = {95, 995, 9995, 10000};
@ -311,9 +314,9 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise
// CASE 1: Test HET insertion
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0);
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -329,12 +332,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create deletion VC
final int delLength = 4;
final List<Allele> delAlleles = new ArrayList<Allele>();
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
delAlleles.add(Allele.create(refByte, false));
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
for (int matches: matchArray) {
for (int mismatches: mismatchArray) {
@ -343,7 +346,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0);
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -388,8 +391,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
final Set<String> laneIDs = new TreeSet<String>();
@ -407,17 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel);
// all first ref allele
allAlleles.add(Allele.create(refByte,true));
for (byte b: BaseUtils.BASES) {
if (refByte == b)
allAlleles.add(Allele.create(b,true));
else
if (refByte != b)
allAlleles.add(Allele.create(b, false));
}
final int refIdx = 0;
int altIdx = -1;
for (int k=0; k < allAlleles.size(); k++)
if (altByte == allAlleles.get(k).getBases()[0]) {
altIdx = k;
break;
}
PrintStream out = null;
if (SIMULATE_NOISY_PILEUP) {
try {
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
out = new PrintStream(new File("GLUnitTest.table"));
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
}
catch (Exception e) {}
@ -441,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
if (VERBOSE) {
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -470,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
mlPair = noisyGL.getMostLikelyACCount();

View File

@ -12,50 +12,66 @@ import org.testng.annotations.Test;
* Time: 11:28 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolCallerIntegrationTest extends WalkerTest {
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
final String REFSAMPLE_NAME = "NA12878";
final String MTINTERVALS = "MT";
final String MTINTERVALS = "MT:1-3000";
final String LSVINTERVALS = "20:40,000,000-41,000,000";
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
private void PC_MT_Test(String bam, String args, String name, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
@Test
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
}
@Test
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
}
@Test
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
}
@Test
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
}
}

View File

@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + ref + SNP = MNP with ref base gap
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion = MNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + deletion
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
}
/**

View File

@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "882ece8391cfec30ab658970a487d078");
HCTest(CEUTRIO_BAM, "", "6b30c7e1b6bbe80d180d9d67441cec12");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "6f458e04251bc4c09b5b544d46f19d68");
HCTest(NA12878_BAM, "", "4cdfbfeadef00725974828310558d7d4");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "c172791007f867bf3b975d4194564d9e");
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "6183fb6e374976d7087150009685e043");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -41,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "94186811016b332a58c150df556278f8");
HCTestComplexVariants(CEUTRIO_BAM, "", "ab7593a7a60a2e9a66053572f1718df1");
}
}

View File

@ -95,9 +95,10 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
for( int iii = 1; iii <= 3; iii++) {
Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
int readCount = 1;
if( readLikelihood != null ) {
Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood});
haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount});
haplotypes.add(haplotype);
}
}

View File

@ -7,6 +7,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
*/
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -18,6 +20,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*;
public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
@ -143,6 +146,44 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
Assert.assertTrue(graphEquals(graph, expectedGraph));
}
@Test(enabled=false)
// not ready yet
public void testBasicGraphCreation() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
final String altBase = (refBase==(byte)'A'?"C":"A");
final int matches = 50;
final int mismatches = 50;
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
PrintStream graphWriter = null;
try{
graphWriter = new PrintStream("du.txt");
} catch (Exception e) {}
SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
refHaplotype.setIsReference(true);
assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
/* // clean up the graphs by pruning and merging
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
//eliminateNonRefPaths( graph );
SimpleDeBruijnAssembler.mergeNodes( graph );
}
*/
if( graphWriter != null ) {
assembler.printGraphs();
}
int k=2;
// find the best paths in the graphs
// return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
}
@Test(enabled = true)
public void testEliminateNonRefPaths() {
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);

View File

@ -1,8 +1,18 @@
library("ggplot2")
library(gplots)
library("reshape")
library("grid")
library("tools") #For compactPDF in R 2.13+
library(gsalib)
args <- commandArgs(TRUE)
if ( interactive() ) {
args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
} else {
args <- commandArgs(TRUE)
}
data <- read.csv(args[1])
gsa.report <- gsa.read.gatkreport(args[2])
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
numRG = length(unique(data$ReadGroup))
@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
p <- ggplot(d, aes(x=CovariateValue)) +
xlab(paste(cov,"Covariate")) +
ylab("Number of Observations") +
ylab("No. of Observations (area normalized)") +
blankTheme
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
scale_y_continuous(formatter="comma")
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
d <- d + scale_fill_manual(values=c("maroon1","blue"))
d <- d + facet_grid(.~EventType)
# d <- d + scale_y_continuous(formatter="comma")
}
}
pdf(args[2],height=9,width=15)
if ( ! is.na(args[3]) )
pdf(args[3],height=9,width=15)
#frame()
textplot(gsa.report$Arguments, show.rownames=F)
title(
main="GATK BaseRecalibration report",
sub=date())
distributeGraphRows(list(a,b,c), c(1,1,1))
distributeGraphRows(list(d,e,f), c(1,1,1))
dev.off()
# format the overall information
rt0 <- data.frame(
ReadGroup = gsa.report$RecalTable0$ReadGroup,
EventType = gsa.report$RecalTable0$EventType,
EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))
textplot(t(rt0), show.colnames=F)
title("Overall error rates by event type")
if (exists('compactPDF')) {
compactPDF(args[2])
# plot per quality score recalibration table
textplot(gsa.report$RecalTable1, show.rownames=F)
title("Error rates by event type and initial quality score")
if ( ! is.na(args[3]) ) {
dev.off()
if (exists('compactPDF')) {
compactPDF(args[2])
}
}

View File

@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
if ( requestedStrat == "Sample" ) {
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
} else { # by AlleleCount
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")

View File

@ -208,6 +208,7 @@ public class FastaSequenceIndexBuilder {
break;
}
}
in.close();
return sequenceIndex;
}
catch (IOException e) {

View File

@ -12,10 +12,10 @@ import java.util.*;
*/
public class Bases implements Iterable<Byte>
{
public static byte A = 'A';
public static byte C = 'C';
public static byte G = 'G';
public static byte T = 'T';
public static final byte A = 'A';
public static final byte C = 'C';
public static final byte G = 'G';
public static final byte T = 'T';
public static final Bases instance = new Bases();

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.lang.annotation.Annotation;
import java.util.List;
@ -147,6 +148,9 @@ public class ArgumentDefinition {
this.exclusiveOf = exclusiveOf;
this.validation = validation;
this.validOptions = validOptions;
validateName(shortName);
validateName(fullName);
}
/**
@ -192,6 +196,9 @@ public class ArgumentDefinition {
else
shortName = null;
validateName(shortName);
validateName(fullName);
this.ioType = ioType;
this.argumentType = argumentType;
this.fullName = fullName;
@ -277,4 +284,14 @@ public class ArgumentDefinition {
String validation = (String)CommandLineUtils.getValue(annotation, "validation");
return validation.trim().length() > 0 ? validation.trim() : null;
}
/**
* Make sure the argument's name is valid
*
* @param name
*/
private void validateName(final String name) {
if ( name != null && name.startsWith("-") )
throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
}
}

View File

@ -55,10 +55,8 @@ public class ArgumentDefinitionGroup implements Iterable<ArgumentDefinition> {
* Does the name of this argument group match the name of another?
*/
public boolean groupNameMatches( ArgumentDefinitionGroup other ) {
if( this.groupName == null && other.groupName == null )
return true;
if( this.groupName == null && other.groupName != null )
return false;
if( this.groupName == null )
return other.groupName == null;
return this.groupName.equals(other.groupName);
}

View File

@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor {
/**
* our log, which we want to capture anything from org.broadinstitute.sting
*/
protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
/**
* Fetch the given descriptor from the descriptor repository.

View File

@ -120,8 +120,8 @@ public abstract class ParsingMethod {
*/
private static final String TAG_TEXT = "[\\w\\-\\.\\=]*";
public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.FullNameDefinitionMatcher) {};
public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.ShortNameDefinitionMatcher) {};
}

View File

@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
if ( getArgumentCollection().gatkKeyFile == null ) {
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
"for more information and instructions on how to obtain a key.");
"Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
" for more information and instructions on how to obtain a key.");
}
else {
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();

View File

@ -130,6 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
}

View File

@ -233,10 +233,6 @@ public class GenomeAnalysisEngine {
if (args.nonDeterministicRandomSeed)
resetRandomGenerator(System.currentTimeMillis());
// TODO -- REMOVE ME WHEN WE STOP BCF testing
if ( args.USE_SLOW_GENOTYPES )
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (args.BQSR_RECAL_FILE != null)
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
@ -797,6 +793,14 @@ public class GenomeAnalysisEngine {
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
if (argCollection.keepProgramRecords)
removeProgramRecords = false;
return new SAMDataSource(
samReaderIDs,
threadAllocation,
@ -813,7 +817,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(),
refReader,
getBaseRecalibration(),
argCollection.defaultBaseQualities);
argCollection.defaultBaseQualities,
removeProgramRecords);
}
/**
@ -840,20 +845,9 @@ public class GenomeAnalysisEngine {
SAMSequenceDictionary sequenceDictionary,
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
VCFHeader header = null;
if ( getArguments().repairVCFHeader != null ) {
try {
final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
pbs.close();
} catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
}
}
final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
builder,

View File

@ -57,8 +57,6 @@ public class GATKArgumentCollection {
public GATKArgumentCollection() {
}
public Map<String, String> walkerArgs = new HashMap<String, String>();
// parameters and their defaults
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<String> samFiles = new ArrayList<String>();
@ -66,10 +64,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null;
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public File gatkKeyFile = null;
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@ -249,6 +247,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
public boolean removeProgramRecords = false;
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
public boolean keepProgramRecords = false;
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe;
@ -375,19 +379,5 @@ public class GATKArgumentCollection {
@Hidden
public boolean generateShadowBCF = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
@Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
@Hidden
public boolean USE_SLOW_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
* VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these
* to fill in any missing ones in input VCF files.
*/
@Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
public File repairVCFHeader = null;
}

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
/**
* Created with IntelliJ IDEA.
* User: rpoplin
* Date: 8/20/12
* A collection of arguments that are common to the various callers.
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
*/
public class StandardCallerArgumentCollection {
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
*/
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
/**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
* is the default).
*/
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
/**
* This argument allows you to emit low quality calls as filtered records.
*/
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*/
@Advanced
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 3;
}

View File

@ -118,7 +118,7 @@ class WindowedData {
rec.getAlignmentStart(),
stop);
states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null)
if (provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
}

View File

@ -89,6 +89,11 @@ public class SAMDataSource {
*/
private final SAMFileReader.ValidationStringency validationStringency;
/**
* Do we want to remove the program records from this data source?
*/
private final boolean removeProgramRecords;
/**
* Store BAM indices for each reader present.
*/
@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1);
(byte) -1,
false);
}
/**
@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) {
byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser;
@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null;
validationStringency = strictness;
this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize);
else {
@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers);
availableResources.add(readers);
}
@ -777,9 +785,11 @@ public class SAMDataSource {
/**
* Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load.
* TODO: validationStringency is not used here
* @param validationStringency validation stringency.
* @param removeProgramRecords indicate whether to clear program records from the readers
*/
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}

View File

@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
/**
@ -56,30 +55,31 @@ public class ReferenceDataSource {
private IndexedFastaSequenceFile reference;
/** our log, which we want to capture anything from this class */
protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
/**
* Create reference data source from fasta file
* @param fastaFile Fasta file to be used as reference
*/
public ReferenceDataSource(File fastaFile) {
// does the fasta file exist? check that first...
if (!fastaFile.exists())
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
File dictFile;
if (fastaFile.getAbsolutePath().endsWith("fa")) {
dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
}
else
dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
// determine the name for the dict file
final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
/*
if index file does not exist, create it manually
*/
* if index file does not exist, create it manually
*/
if (!indexFile.exists()) {
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
try {
@ -96,7 +96,7 @@ public class ReferenceDataSource {
}
catch(UserException e) {
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
throw e;
throw e;
}
catch (Exception e) {
// If lock creation succeeded, the failure must have been generating the index.
@ -115,6 +115,8 @@ public class ReferenceDataSource {
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
*/
if (!dictFile.exists()) {
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
/*
@ -219,9 +221,9 @@ public class ReferenceDataSource {
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
shards.add(new LocusShard(parser,
readsDataSource,
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
null));
readsDataSource,
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
null));
}
}
return shards;

View File

@ -58,7 +58,7 @@ import java.util.Collection;
/** Shards and schedules data in manageable chunks. */
public abstract class MicroScheduler implements MicroSchedulerMBean {
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
/**
* Counts the number of instances of the class that are currently alive.

View File

@ -66,13 +66,13 @@ public class TreeReducer implements Callable {
* @return Result of the reduce.
*/
public Object call() {
Object result = null;
Object result;
final long startTime = System.currentTimeMillis();
try {
if( lhs == null )
result = lhs.get();
result = null;
// todo -- what the hell is this above line? Shouldn't it be the two below?
// if( lhs == null )
// throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );

View File

@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
/**
* Filter out reads with wonky cigar strings.
*
* - No reads with Hard/Soft clips in the middle of the cigar
* - No reads starting with deletions (with or without preceding clips)
* - No reads ending in deletions (with or without follow-up clips)
* - No reads that are fully hard or soft clipped
* - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
*
* ps: apparently an empty cigar is okay...
*
* @author ebanks
* @version 0.1
*/
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
public boolean filterOut(final SAMRecord rec) {
final Cigar c = rec.getCigar();
if( c.isEmpty() ) { return false; } // if there is no Cigar then it can't be bad
boolean previousElementWasIndel = false;
CigarOperator lastOp = c.getCigarElement(0).getOperator();
if (lastOp == CigarOperator.D) // filter out reads starting with deletion
return true;
for (CigarElement ce : c.getCigarElements()) {
CigarOperator op = ce.getOperator();
if (op == CigarOperator.D || op == CigarOperator.I) {
if (previousElementWasIndel)
return true; // filter out reads with adjacent I/D
previousElementWasIndel = true;
}
else // this is a regular base (match/mismatch/hard or soft clip)
previousElementWasIndel = false; // reset the previous element
lastOp = op;
// if there is no Cigar then it can't be bad
if( c.isEmpty() ) {
return false;
}
return lastOp == CigarOperator.D;
Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
CigarOperator firstOp = CigarOperator.H;
while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
CigarOperator op = elementIterator.next().getOperator();
// No reads with Hard/Soft clips in the middle of the cigar
if (firstOp != CigarOperator.H && op == CigarOperator.H) {
return true;
}
firstOp = op;
}
// No reads starting with deletions (with or without preceding clips)
if (firstOp == CigarOperator.D) {
return true;
}
boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
boolean previousElementWasIndel = firstOp == CigarOperator.I;
CigarOperator lastOp = firstOp;
CigarOperator previousOp = firstOp;
while (elementIterator.hasNext()) {
CigarOperator op = elementIterator.next().getOperator();
if (op != CigarOperator.S && op != CigarOperator.H) {
// No reads with Hard/Soft clips in the middle of the cigar
if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
return true;
lastOp = op;
if (!hasMeaningfulElements && op.consumesReadBases()) {
hasMeaningfulElements = true;
}
if (op == CigarOperator.I || op == CigarOperator.D) {
// No reads that have consecutive indels in the cigar (II, DD, ID or DI)
if (previousElementWasIndel) {
return true;
}
previousElementWasIndel = true;
}
else {
previousElementWasIndel = false;
}
}
// No reads with Hard/Soft clips in the middle of the cigar
else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
return true;
}
previousOp = op;
}
// No reads ending in deletions (with or without follow-up clips)
// No reads that are fully hard or soft clipped
return lastOp == CigarOperator.D || !hasMeaningfulElements;
}
}

View File

@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker {
try {
tempFile = File.createTempFile( stub.getClass().getName(), null );
tempFile.deleteOnExit();
//tempFile.deleteOnExit();
}
catch( IOException ex ) {
throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );

View File

@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
if (stub.getGenerateMD5())
factory.setCreateMd5File(true);
// Adjust max records in RAM.
// TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
if(stub.getMaxRecordsInRam() != null)
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

View File

@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.io.storage;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -60,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
protected final File file;
protected OutputStream stream;
protected final VariantContextWriter writer;
boolean closed = false;
/**
* Constructs an object which will write directly into the output file provided by the stub.
@ -81,6 +83,18 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
}
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param tempFile File into which to direct the output data.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
logger.debug("Creating temporary output file " + tempFile.getAbsolutePath() + " for VariantContext output.");
this.file = tempFile;
this.writer = vcfWriterToFile(stub, file, false);
writer.writeHeader(stub.getVCFHeader());
}
/**
* common initialization routine for multiple constructors
* @param stub Stub to use when constructing the output file.
@ -139,19 +153,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
}
}
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param tempFile File into which to direct the output data.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
logger.debug("Creating temporary VCF file " + tempFile.getAbsolutePath() + " for VCF output.");
this.file = tempFile;
this.writer = vcfWriterToFile(stub, file, false);
writer.writeHeader(stub.getVCFHeader());
}
public void add(VariantContext vc) {
writer.add(vc);
}
@ -172,20 +173,34 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
if(file != null)
logger.debug("Closing temporary file " + file.getAbsolutePath());
writer.close();
closed = true;
}
public void mergeInto(VariantContextWriterStorage target) {
try {
String sourceFilePath = file.getAbsolutePath();
String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath));
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
if ( ! closed )
throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
// use the feature manager to determine the right codec for the tmp file
// that way we don't assume it's a specific type
final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file);
if ( fd == null )
throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file);
final FeatureCodec<VariantContext> codec = fd.getCodec();
final AbstractFeatureReader<VariantContext> source =
AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false);
for ( VariantContext vc : source.iterator() ) {
for ( final VariantContext vc : source.iterator() ) {
target.writer.add(vc);
}
source.close();
file.delete(); // this should be last to aid in debugging when the process fails
} catch (UserException e) {
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
}

View File

@ -47,6 +47,7 @@ import java.util.List;
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
//
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
}
/**
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
engine.addOutput(stub);
return stub;
}
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if(writerFile == null && !source.isRequired())
throw new MissingArgumentValueException(defaultArgumentDefinition);
// Should we compress the output stream?
boolean compress = isCompressed(writerFileName);
boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object.
VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes)
: new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes);
final VariantContextWriterStub stub = (writerFile != null)
? new VariantContextWriterStub(engine, writerFile, argumentSources)
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches));
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
/**
* Creates the optional compression level argument for the BAM file.
* @return Argument definition for the BAM file itself. Will not be null.
* Creates the optional no_header argument for the VCF file.
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
/**
* Creates the optional compression level argument for the BAM file.
* @return Argument definition for the BAM file itself. Will not be null.
* Creates the optional sites_only argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null );
}
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/**
* Returns true if the file will be compressed.
* @param writerFileName Name of the file

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.OutputStream;
@ -78,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/**
* Should we emit a compressed output stream?
*/
private final boolean isCompressed;
private boolean isCompressed = false;
/**
* A hack: push the argument sources into the VCF header so that the VCF header
@ -89,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/**
* Should the header be written out? A hidden argument.
*/
private final boolean skipWritingCommandLineHeader;
private boolean skipWritingCommandLineHeader = false;
/**
* Should we not write genotypes even when provided?
*/
private final boolean doNotWriteGenotypes;
private boolean doNotWriteGenotypes = false;
/**
* Should we force BCF writing regardless of the file extension?
*/
private boolean forceBCF = false;
/**
* Connects this stub with an external stream capable of serving the
@ -107,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
*
* @param engine engine.
* @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
this.engine = engine;
this.genotypeFile = genotypeFile;
this.genotypeStream = null;
this.isCompressed = isCompressed;
this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
/**
@ -127,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
*
* @param engine engine.
* @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
this.engine = engine;
this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed;
this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
/**
@ -166,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
return isCompressed;
}
public void setCompressed(boolean compressed) {
isCompressed = compressed;
}
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
}
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public void setForceBCF(boolean forceBCF) {
this.forceBCF = forceBCF;
}
/**
* Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
@ -186,6 +196,9 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
}

View File

@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
return stepForwardOnGenome();
} else {
if (curElement != null && curElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
break;
case D: // deletion w.r.t. the reference
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// should be the same as N case
genomeOffset++;
done = true;
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
done = true;
break;
case M:
case EQ:
case X:
readOffset++;
genomeOffset++;
done = true;
@ -279,7 +281,6 @@ public class LocusIteratorByState extends LocusIterator {
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
readStates.collectPendingReads();
final GenomeLoc location = getLocation();
@ -378,7 +379,7 @@ public class LocusIteratorByState extends LocusIterator {
CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}

View File

@ -86,13 +86,14 @@ public class GATKRunReport {
private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
// number of milliseconds before the S3 put operation is timed-out:
private static final long S3PutTimeOut = 30 * 1000;
private static final long S3PutTimeOut = 10 * 1000;
public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";
/**
* our log
*/
protected static Logger logger = Logger.getLogger(GATKRunReport.class);
protected static final Logger logger = Logger.getLogger(GATKRunReport.class);
@Element(required = false, name = "id")

View File

@ -163,43 +163,58 @@ public class VariantContextAdaptors {
@Override
public VariantContext convert(String name, Object input, ReferenceContext ref) {
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
// add all of the alt alleles
boolean sawNullAllele = refAllele.isNull();
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
Allele altAllele = Allele.create(alt, false);
alleles.add(altAllele);
if ( altAllele.isNull() )
sawNullAllele = true;
}
final byte refBaseForIndel = ref.getBases()[index];
Map<String, Object> attributes = new HashMap<String, Object>();
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
builder.alleles(alleles);
builder.referenceBaseForIndel(refBaseForIndel);
return builder.make();
} else
boolean addPaddingBase;
if ( isSNP(dbsnp) || isMNP(dbsnp) )
addPaddingBase = false;
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
else
return null; // can't handle anything else
Allele refAllele;
if ( dbsnp.getNCBIRefBase().equals("-") )
refAllele = Allele.create(refBaseForIndel, true);
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
else
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( Allele.wouldBeNullAllele(alt.getBytes()))
alt = "";
else if ( ! Allele.acceptableAlleleBases(alt) )
return null;
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
}
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
builder.alleles(alleles);
return builder.make();
}
private static List<String> stripNullDashes(final List<String> alleles) {
final List<String> newAlleles = new ArrayList<String>(alleles.size());
for ( final String allele : alleles ) {
if ( allele.equals("-") )
newAlleles.add("");
else
newAlleles.add(allele);
}
return newAlleles;
}
}
@ -294,7 +309,6 @@ public class VariantContextAdaptors {
int index = hapmap.getStart() - ref.getWindow().getStart();
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
HashSet<Allele> alleles = new HashSet<Allele>();
Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -351,7 +365,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd();
if ( deletionLength > 0 )
end += deletionLength;
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
return vc;
}
}

View File

@ -85,18 +85,16 @@ public class FeatureManager {
private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
private final VCFHeader headerForRepairs;
private final boolean lenientVCFProcessing;
/**
* Construct a FeatureManager without a master VCF header
*/
public FeatureManager() {
this(null, false);
this(false);
}
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
this.headerForRepairs = headerForRepairs;
public FeatureManager(final boolean lenientVCFProcessing) {
this.lenientVCFProcessing = lenientVCFProcessing;
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
@ -255,8 +253,6 @@ public class FeatureManager {
((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
if ( codex instanceof VCFCodec )
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
((AbstractVCFCodec)codex).disableOnTheFlyModifications();

View File

@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
* please talk through your approach with the SE team.
* @param dict Sequence dictionary to use.
* @param genomeLocParser Location parser to use.
* @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
*/
public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser,
final VCFHeader headerForRepairs,
ValidationExclusion.TYPE validationExclusionType) {
this.dict = dict;
this.validationExclusionType = validationExclusionType;
this.genomeLocParser = genomeLocParser;
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
}
/**
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
return featureManager;
}
/**
* Same as full constructor but makes one without a header for repairs
* @param dict
* @param genomeLocParser
* @param validationExclusionType
*/
public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
this(dict, genomeLocParser, null, validationExclusionType);
}
/**
* create a RMDTrack of the specified type
*

View File

@ -89,9 +89,9 @@ public class GATKReport {
reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
} catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file);
throw new UserException.CouldNotReadInputFile(file, e);
}

View File

@ -208,11 +208,23 @@ public class GATKReportTable {
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param name the name of the table or column
* @return true if the name is valid, false if otherwise
* Create a new GATKReportTable with the same structure
* @param tableToCopy
*/
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
addColumn(column.getColumnName(), column.getFormat());
if ( copyData )
throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param name the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidName(String name) {
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
Matcher m = p.matcher(name);
@ -490,6 +502,17 @@ public class GATKReportTable {
return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
}
/**
* Get a value from the given position in the table
*
* @param rowIndex the row ID
* @param columnName the name of the column
* @return the value stored at the specified position in the table
*/
public Object get(final int rowIndex, final String columnName) {
return get(rowIndex, columnNameToIndex.get(columnName));
}
/**
* Get a value from the given position in the table
*

View File

@ -62,54 +62,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
}
/**
* Simple utility class that makes it convenient to print unit adjusted times
*/
private static class MyTime {
double t; // in Seconds
int precision; // for format
public MyTime(double t, int precision) {
this.t = t;
this.precision = precision;
}
public MyTime(double t) {
this(t, 1);
}
/**
* Instead of 10000 s, returns 2.8 hours
* @return
*/
public String toString() {
double unitTime = t;
String unit = "s";
if ( t > 120 ) {
unitTime = t / 60; // minutes
unit = "m";
if ( unitTime > 120 ) {
unitTime /= 60; // hours
unit = "h";
if ( unitTime > 100 ) {
unitTime /= 24; // days
unit = "d";
if ( unitTime > 20 ) {
unitTime /= 7; // days
unit = "w";
}
}
}
}
return String.format("%6."+precision+"f %s", unitTime, unit);
}
}
/** lock object to sure updates to history are consistent across threads */
private static final Object lock = new Object();
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@ -140,7 +92,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
GenomeLocSortedSet targetIntervals = null;
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
protected GenomeAnalysisEngine engine;
@ -280,20 +232,20 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
final MyTime elapsed = new MyTime(last.elapsedSeconds);
final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
if ( printProgress ) {
lastProgressPrintTime = curTime;
// dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
else
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
@ -308,8 +260,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
lastPerformanceLogPrintTime = curTime;
synchronized(performanceLogLock) {
performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed,
bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(),
timeToCompletion.getTimeInSeconds());
}
}
}
@ -401,7 +354,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
synchronized(performanceLogLock) {
// Ignore multiple calls to reset the same lock.
if(performanceLogFile != null && performanceLogFile.equals(fileName))
if(performanceLogFile != null && performanceLogFile.equals(file))
return;
// Close an existing log

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -28,7 +29,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
/**
* our log, which we want to capture anything from this class
*/
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
profile.add(fakeLoc, isActiveProb);
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
}
}
}
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
profile.add(location, isActiveProb);
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
}
// Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
//
// --------------------------------------------------------------------------------
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}

View File

@ -19,7 +19,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
/**
* our log, which we want to capture anything from this class
*/
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
@Override
protected String getTraversalType() {

View File

@ -24,7 +24,7 @@ import java.util.List;
public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReadPairs.class);
protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class);
@Override
protected String getTraversalType() {

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReads.class);
protected static final Logger logger = Logger.getLogger(TraverseReads.class);
@Override
protected String getTraversalType() {
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
ReadView reads = new ReadView(dataProvider);
ReadReferenceView reference = new ReadReferenceView(dataProvider);
@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
ReferenceContext refContext = null;
// get the array of characters for the reference sequence, since we're a mapped read
if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
refContext = reference.getReferenceContext(read);
// update the number of reads we've seen

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -27,10 +28,11 @@ import java.util.List;
*/
@By(DataSource.READS)
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
@ -72,7 +74,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
}
// Determine probability of active status over the AlignmentContext
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);

View File

@ -574,7 +574,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
}
}
public class ReadClipperWithData extends ReadClipper {
public static class ReadClipperWithData extends ReadClipper {
private ClippingData data;
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {

View File

@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
* Allow user to choose between a number of different data sources.
*/
public enum DataSource {
/**
* Does this walker require read (BAM) data to work?
*/
READS,
/**
* Does this walker require reference data to work?
*/
REFERENCE,
REFERENCE_BASES, // Do I actually need the reference bases passed to the walker?
/**
* Does this walker require reference order data (VCF) to work?
*/
REFERENCE_ORDERED_DATA
}

View File

@ -16,9 +16,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -64,9 +64,17 @@ import java.util.List;
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
@Output
PrintStream out;
/**
* In addition to the standard pileup output, adds 'verbose' output too. The verbose output contains the number of spanning deletions,
* and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality. These per
* read items are delimited with an '@' character.
*/
@Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
public boolean SHOW_VERBOSE = false;
@ -116,8 +124,6 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
return rodString;
}
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
private static String createVerboseOutput(final ReadBackedPileup pileup) {
final StringBuilder sb = new StringBuilder();
boolean isFirst = true;

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
* Time: 2:52:28 PM
* To change this template use File | Settings | File Templates.
*/
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
public boolean requiresOrderedReads() { return false; }

View File

@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.REFERENCE)
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@Requires({DataSource.REFERENCE})
@Allows(DataSource.REFERENCE)
public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
}

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 8/2/12
* Time: 1:58 PM
* To change this template use File | Settings | File Templates.
*/
import java.lang.annotation.*;
/**
* Indicates that program records should be removed from SAM headers by default for this walker
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface RemoveProgramRecords {
}

View File

@ -65,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
// by design, first element in LinkedHashMap was ref allele
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) {
for (Map.Entry<Allele, Double> entry : el.entrySet()) {
if (a.isReference())
refLikelihood =el.get(a);
if (entry.getKey().isReference())
refLikelihood = entry.getValue();
else {
double like = el.get(a);
double like = entry.getValue();
if (like >= altLikelihood)
altLikelihood = like;
}

View File

@ -22,19 +22,10 @@ import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL.
*
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
* -dcov D is N * D
* While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
* internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
* N samples with -dcov D is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {

View File

@ -24,10 +24,10 @@ import java.util.List;
/**
* The depth of coverage of each VCF allele in this sample.
*
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
* sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
* Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
* REF and ALT fields) is the unfiltered count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
@ -35,17 +35,13 @@ import java.util.List;
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
* are unambiguously informative about the alternate allele). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
* determine the genotype calls.
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static final String REF_ALLELE = "REF";
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
if ( g == null || !g.isCalled() )
return;
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
if ( vc.isSNP() )
annotateSNP(stratifiedContext, vc, gb);
else if ( vc.isIndel() )
annotateIndel(stratifiedContext, vc, gb);
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
}
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlleles() )
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
gb.AD(counts);
}
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
if ( pileup == null )
return;
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
alleleCounts.put(REF_ALLELE, 0);
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
final Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
}
for ( PileupElement p : pileup ) {
if ( p.isBeforeInsertion() ) {
final String b = p.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
if ( alleleCounts.containsKey(insertion) ) {
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
}
} else if ( p.isBeforeDeletionStart() ) {
if ( p.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
if ( p.getEventLength() == refAllele.length() - 1 ) {
// this is indeed the deletion allele recorded in VC
final Allele deletion = Allele.create(refBase);
if ( alleleCounts.containsKey(deletion) ) {
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
}
}
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
}
}
int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(REF_ALLELE);
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(refAllele);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
gb.AD(counts);
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

View File

@ -291,8 +291,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
int[][] table = new int[2][2];
for ( String sample : stratifiedContexts.keySet() ) {
final AlignmentContext context = stratifiedContexts.get(sample);
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final AlignmentContext context = sample.getValue();
if ( context == null )
continue;
@ -313,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) {
for (Map.Entry<Allele,Double> entry : el.entrySet()) {
if (a.isReference())
refLikelihood =el.get(a);
if (entry.getKey().isReference())
refLikelihood = entry.getValue();
else {
double like = el.get(a);
double like = entry.getValue();
if (like >= altLikelihood)
altLikelihood = like;
}

View File

@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map;
}
private class HaplotypeComparator implements Comparator<Haplotype> {
private static class HaplotypeComparator implements Comparator<Haplotype> {
public int compare(Haplotype a, Haplotype b) {
if (a.getQualitySum() < b.getQualitySum())
@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
// Score all the reads in the pileup, even the filtered ones
final double[] scores = new double[el.size()];
int i = 0;
for (Allele a : el.keySet()) {
scores[i++] = -el.get(a);
for (Map.Entry<Allele, Double> a : el.entrySet()) {
scores[i++] = -a.getValue();
if (DEBUG) {
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
}

View File

@ -61,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
// by design, first element in LinkedHashMap was ref allele
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) {
for (Map.Entry<Allele,Double> a : el.entrySet()) {
if (a.isReference())
refLikelihood =el.get(a);
if (a.getKey().isReference())
refLikelihood = a.getValue();
else {
double like = el.get(a);
double like = a.getValue();
if (like >= altLikelihood)
altLikelihood = like;
}

View File

@ -87,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
for (Allele a : el.keySet()) {
if (a.isReference())
refLikelihood = el.get(a);
for (Map.Entry<Allele,Double> a : el.entrySet()) {
if (a.getKey().isReference())
refLikelihood = a.getValue();
else {
double like = el.get(a);
double like = a.getValue();
if (like >= altLikelihood)
altLikelihood = like;
}
@ -100,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
final int numAlignedBases = getNumAlignedBases(p.getRead());
int rp = readPos;
if (readPos > numAlignedBases / 2) {
readPos = numAlignedBases - (readPos + 1);
}

View File

@ -66,8 +66,8 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
return map;
}
public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
public static final VCFInfoHeaderLine[] descriptions = {
protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
protected static final VCFInfoHeaderLine[] descriptions = {
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -142,9 +141,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));
// Open output file specified by output VCF ROD
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
if ( comp.isBound() ) {
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
@ -250,8 +246,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
String refString = vc_input.getReference().getDisplayString();
if (refString.length() == 0) // ref was null
refString = Allele.NULL_ALLELE_STRING;
Allele bglAlleleA, bglAlleleB;

View File

@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
for ( Allele allele : preferredVC.getAlleles() ) {
String bglPrintString;
if (allele.isNoCall() || allele.isNull())
if (allele.isNoCall())
bglPrintString = "-";
else
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
@ -351,7 +351,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
}
public static class CachingFormatter {
private int maxCacheSize = 0;
private String format;
private LRUCache<Double, String> cache;
@ -379,7 +378,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
}
public CachingFormatter(String format, int maxCacheSize) {
this.maxCacheSize = maxCacheSize;
this.format = format;
this.cache = new LRUCache<Double, String>(maxCacheSize);
}

View File

@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
// write out the alleles at this site
for ( Allele allele : vc.getAlleles() ) {
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
}
// write out sample level genotypes

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import java.io.File;
import java.io.FileNotFoundException;
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
generalReport.output(outputFile);

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.collections.Pair;
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -103,13 +107,13 @@ import java.util.ArrayList;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables;
@ -132,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
*/
public void initialize() {
// TODO -- remove me after the 2.1 release
if ( getToolkit().getArguments().numberOfThreads > 1 )
throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
// check for unsupported access
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
@ -143,12 +151,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) {
RecalDataManager.listAvailableCovariates(logger);
RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
@ -222,17 +230,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
if (readNotSeen(read)) {
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
RecalDataManager.parsePlatformForRead(read, RAC);
if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
RecalUtils.parsePlatformForRead(read, RAC);
if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
continue;
}
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
}
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
RecalDataManager.isColorSpaceConsistent(read, offset))
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
}
countedSites++;
@ -242,9 +250,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
}
/**
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
* Initialize the reduce step by returning 0L
*
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker
* @return returns 0L
*/
public Long reduceInit() {
return 0L;
@ -271,13 +279,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
public void onTraversalDone(Long result) {
logger.info("Calculating quantized quality scores...");
quantizeQualityScores();
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) {
logger.info("Generating recalibration plots...");
generatePlots();
}
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
logger.info("Processed: " + result + " sites");
}
@ -285,10 +296,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
else
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
}
@ -309,7 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
}
}

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 6, 2010
*
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
public Datum(long numObservations, long numMismatches) {
this.numObservations = numObservations;
this.numMismatches = numMismatches;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
synchronized void increment(final boolean isError) {
numObservations++;
numMismatches += isError ? 1:0;
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Datum))
return false;
Datum other = (Datum) o;
return numMismatches == other.numMismatches && numObservations == other.numObservations;
}
}

View File

@ -1,148 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum extends Datum {
private static final double UNINITIALIZED = -1.0;
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
numObservations = _numObservations;
numMismatches = _numMismatches;
estimatedQReported = reportedQuality;
empiricalQuality = UNINITIALIZED;
}
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.numObservations;
this.numMismatches = copy.numMismatches;
this.estimatedQReported = copy.estimatedQReported;
this.empiricalQuality = copy.empiricalQuality;
}
public void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.numObservations, other.numMismatches);
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
empiricalQuality = UNINITIALIZED;
}
@Override
public void increment(final boolean isError) {
super.increment(isError);
empiricalQuality = UNINITIALIZED;
}
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
protected final void calcEmpiricalQuality() {
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public void setEstimatedQReported(final double estimatedQReported) {
this.estimatedQReported = estimatedQReported;
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public void setEmpiricalQuality(final double empiricalQuality) {
this.empiricalQuality = empiricalQuality;
}
public final double getEmpiricalQuality() {
if (empiricalQuality == UNINITIALIZED)
calcEmpiricalQuality();
return empiricalQuality;
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}
public String stringForCSV() {
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
}
/**
* We don't compare the estimated quality reported because it may be different when read from
* report tables.
*
* @param o the other recal datum
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof RecalDatum))
return false;
RecalDatum other = (RecalDatum) o;
return super.equals(o) &&
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
}
}

View File

@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.util.Collections;
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
* reads which have had the reference inserted because of color space inconsistencies.
*/
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
/**
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
* their color space tag can not be recalibrated.
*/
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
/**
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@ -174,44 +175,44 @@ public class RecalibrationArgumentCollection {
public File recalibrationReport = null;
public GATKReportTable generateReportTable() {
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
argumentsTable.addColumn("Argument");
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addRowID("covariate", true);
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
argumentsTable.addRowID("no_standard_covs", true);
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.addRowID("run_without_dbsnp", true);
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.addRowID("solid_recal_mode", true);
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.addRowID("solid_nocall_strategy", true);
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true);
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true);
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true);
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("low_quality_tail", true);
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true);
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.addRowID("force_platform", true);
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
@ -45,15 +47,17 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
@Output(required = true)
private PrintStream out;
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
private int coverageThreshold = 20;
@Override
// Look to see if the region has sufficient coverage
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
// note the linear probability scale
int coverageThreshold = 20;
return Math.min((double) depth / coverageThreshold, 1);
return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
}
@ -74,9 +78,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
public Long reduce(final GenomeLoc value, Long reduce) {
if (value != null) {
out.println(value.toString());
return reduce++;
} else
return reduce;
reduce++;
}
return reduce;
}
@Override

View File

@ -264,7 +264,7 @@ class SampleStatistics {
return false;
// different contigs
if (read.getMateReferenceIndex() != read.getReferenceIndex())
if (!read.getMateReferenceIndex().equals(read.getReferenceIndex()))
return false;
// unmapped

View File

@ -104,7 +104,9 @@ public class BAMDiffableReader implements DiffableReader {
InputStream fstream = new BufferedInputStream(new FileInputStream(file));
if ( !BlockCompressedInputStream.isValidFile(fstream) )
return false;
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream);
BCIS.read(buffer, 0, BAM_MAGIC.length);
BCIS.close();
return Arrays.equals(buffer, BAM_MAGIC);
} catch ( IOException e ) {
return false;

View File

@ -224,7 +224,7 @@ public class DiffNode extends DiffValue {
// X=(A=A B=B C=(D=D))
String[] parts = tree.split("=", 2);
if ( parts.length != 2 )
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
throw new ReviewedStingException("Unexpected tree structure: " + tree);
String name = parts[0];
String value = parts[1];

View File

@ -90,8 +90,10 @@ public class GATKReportDiffableReader implements DiffableReader {
public boolean canRead(File file) {
try {
final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
char[] buff = new char[HEADER.length()];
new FileReader(file).read(buff, 0, HEADER.length());
final char[] buff = new char[HEADER.length()];
final FileReader FR = new FileReader(file);
FR.read(buff, 0, HEADER.length());
FR.close();
String firstLine = new String(buff);
return firstLine.startsWith(HEADER);
} catch (IOException e) {

View File

@ -47,7 +47,10 @@ import java.util.List;
* <p>
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
* Note that if there are multiple variants at a site, it takes the first one seen.
* Several important notes:
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
@ -102,16 +105,16 @@ public class FastaAlternateReference extends FastaReference {
String refBase = String.valueOf((char)ref.getBase());
// Check to see if we have a called snp
for ( VariantContext vc : tracker.getValues(variants) ) {
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
if ( vc.isFiltered() )
continue;
if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length();
deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
}

View File

@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
EXACT,
POOL
EXACT
}
protected int N;

View File

@ -148,8 +148,8 @@ public class ConsensusAlleleCounter {
boolean foundKey = false;
// copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
for (String s : consensusIndelStrings.keySet()) {
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
for (Map.Entry<String, Integer> s : consensusIndelStrings.entrySet()) {
cList.add(new Pair<String, Integer>(s.getKey(), s.getValue()));
}
if (read.getAlignmentEnd() == loc.getStart()) {
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
// get ref bases of accurate deletion
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
if (Allele.acceptableAlleleBases(refBases, false)) {
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
altAllele = Allele.create(ref.getBase(), false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(s, false);
final String insertionBases = (char)ref.getBase() + s; // add reference padding
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
refAllele = Allele.create(ref.getBase(), true);
altAllele = Allele.create(insertionBases, false);
stop = loc.getStart();
}
else continue; // go on to next allele if consensus insertion has any non-standard base.
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes();
if (doMultiAllelicCalls) {
vcs.add(builder.make());

View File

@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
BOTH,
POOLSNP,
POOLINDEL,
POOLBOTH
GeneralPloidySNP,
GeneralPloidyINDEL,
BOTH
}
public enum GENOTYPING_MODE {

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear();
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
if (alleleList.isEmpty())
return null;
}
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty())
return null;
// start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get();
}
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0);
int endLoc = loc.getStart() + refAllele.length()-1;
if (allelesArePadded)
endLoc++;
return endLoc;
}
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
final ReferenceContext ref,
final GenomeLoc loc,
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType,
final GenomeLocParser locParser,
final UnifiedArgumentCollection UAC,
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
// ignore places where we don't have a variant
if (vc == null)
return new Pair<List<Allele>,Boolean>(alleles,false);
return alleles;
if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else {
alleles.addAll(vc.getAlleles());
}
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
}
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
return alleles;
}
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,

View File

@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
}
public class BAQedPileupElement extends PileupElement {
public static class BAQedPileupElement extends PileupElement {
public BAQedPileupElement( final PileupElement PE ) {
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
}

View File

@ -26,11 +26,12 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
public class UnifiedArgumentCollection {
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
*/
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
/**
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
/**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
* is the default).
*/
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
/**
* This argument allows you to emit low quality calls as filtered records.
*/
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* Note that calculating the SLOD increases the runtime by an appreciable amount.
*/
@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/**
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -107,18 +75,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*/
@Advanced
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 3;
@Hidden
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
// indel-related arguments
@ -139,19 +97,18 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
/**
* This argument informs the prior probability of having an indel at a site.
*/
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
@Hidden
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
@Advanced
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
@Hidden
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
@Advanced
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
public byte INDEL_GAP_OPEN_PENALTY = 45;
@Hidden
@ -181,7 +138,6 @@ public class UnifiedArgumentCollection {
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
sample-level error model
*/
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
public boolean IGNORE_LANE_INFO = false;
@ -263,7 +219,6 @@ public class UnifiedArgumentCollection {
uac.referenceSampleName = referenceSampleName;
uac.samplePloidy = samplePloidy;
uac.maxQualityScore = minQualityScore;
uac.maxQualityScore = maxQualityScore;
uac.phredScaledPrior = phredScaledPrior;
uac.minPower = minPower;
uac.minReferenceDepth = minReferenceDepth;
@ -276,5 +231,16 @@ public class UnifiedArgumentCollection {
return uac;
}
public UnifiedArgumentCollection() { }
public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
super();
this.alleles = SCAC.alleles;
this.GenotypingMode = SCAC.GenotypingMode;
this.heterozygosity = SCAC.heterozygosity;
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
this.OutputMode = SCAC.OutputMode;
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
}
}

View File

@ -82,7 +82,7 @@ import java.util.*;
* -o snps.raw.vcf \
* -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \
* -dcov [50] \
* -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
* [-L targets.interval_list]
* </pre>
*
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
} else {
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
}
*/
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();
@ -311,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// add the pool values for each genotype
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}
if (UAC.referenceSampleName != null) {
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));

Some files were not shown because too many files have changed in this diff Show More