Merge remote-tracking branch 'unstable/master'
This commit is contained in:
commit
5b1781fdac
33
build.xml
33
build.xml
|
|
@ -76,7 +76,8 @@
|
|||
<property name="staging.dir" value="staging" />
|
||||
<property name="default.executable" value="none" />
|
||||
|
||||
<!-- Javadoc/Scaladoc directories -->
|
||||
<!-- GATKDocs/Javadoc/Scaladoc directories -->
|
||||
<property name="gatkdocs.dir" value="gatkdocs" />
|
||||
<property name="javadoc.dir" value="javadoc" />
|
||||
<property name="scaladoc.dir" value="scaladoc" />
|
||||
|
||||
|
|
@ -92,12 +93,10 @@
|
|||
<!-- To disable for test targets, run with -Duse.contracts=false -->
|
||||
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
|
||||
<property name="java.contracts.dir" value="${build.dir}/java/contracts" />
|
||||
<property name="contracts.version" value="1.0-20110609" />
|
||||
<property name="contracts.version" value="1.0-r139" />
|
||||
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
|
||||
<property name="contract.dump.dir" value="dump" />
|
||||
|
||||
<property name="gatkdocs.dir" value="gatkdocs" />
|
||||
|
||||
<!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
|
||||
<property name="halt" value="yes" />
|
||||
|
||||
|
|
@ -208,19 +207,19 @@
|
|||
<include name="**/*.java" />
|
||||
</fileset>
|
||||
|
||||
<pathconvert property="external.build.dir">
|
||||
<path path="${java.classes}"/>
|
||||
</pathconvert>
|
||||
<path id="external.build.dir">
|
||||
<path path="${java.classes}" />
|
||||
</path>
|
||||
|
||||
<pathconvert property="external.dist.dir">
|
||||
<path id="external.dist.dir">
|
||||
<path path="${dist.dir}" />
|
||||
</pathconvert>
|
||||
</path>
|
||||
|
||||
<!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes -->
|
||||
<pathconvert property="external.gatk.classpath">
|
||||
<path id="external.gatk.classpath">
|
||||
<path path="${java.classes}"/>
|
||||
<path refid="external.dependencies" />
|
||||
</pathconvert>
|
||||
</path>
|
||||
|
||||
<!-- the path for resources that need to go into the GATK jar;
|
||||
any additional resources should go into this set. -->
|
||||
|
|
@ -430,9 +429,9 @@
|
|||
|
||||
<target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external">
|
||||
<subant target="compile" genericantfile="build.xml">
|
||||
<property name="build.dir" value="${external.build.dir}" />
|
||||
<property name="dist.dir" value="${external.dist.dir}" />
|
||||
<property name="gatk.classpath" value="${external.gatk.classpath}" />
|
||||
<property name="build.dir" refid="external.build.dir" />
|
||||
<property name="dist.dir" refid="external.dist.dir" />
|
||||
<property name="gatk.classpath" refid="external.gatk.classpath" />
|
||||
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
|
||||
</subant>
|
||||
</target>
|
||||
|
|
@ -680,9 +679,9 @@
|
|||
</jar>
|
||||
|
||||
<subant target="dist" genericantfile="build.xml">
|
||||
<property name="build.dir" value="${external.build.dir}" />
|
||||
<property name="dist.dir" value="${external.dist.dir}" />
|
||||
<property name="gatk.classpath" value="${external.gatk.classpath}" />
|
||||
<property name="build.dir" refid="external.build.dir" />
|
||||
<property name="dist.dir" refid="external.dist.dir" />
|
||||
<property name="gatk.classpath" refid="external.gatk.classpath" />
|
||||
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
|
||||
</subant>
|
||||
</target>
|
||||
|
|
|
|||
4
ivy.xml
4
ivy.xml
|
|
@ -52,7 +52,7 @@
|
|||
<dependency org="gov.nist" name="Jama" rev="1.0.2"/>
|
||||
|
||||
<!-- Dependencies for the graph aligner -->
|
||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||
<dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
|
||||
|
||||
<!-- Dependencies for the html walker documention -->
|
||||
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
|
||||
|
|
@ -87,7 +87,7 @@
|
|||
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
|
||||
|
||||
<!-- Contracts for Java and dependencies -->
|
||||
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609"/>
|
||||
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
|
||||
<dependency org="asm" name="asm-all" rev="3.3.1"/>
|
||||
|
||||
<!-- POI, for reading pipeline files -->
|
||||
|
|
|
|||
|
|
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -30,24 +35,26 @@ public class ErrorModel {
|
|||
private static final boolean compressRange = false;
|
||||
|
||||
private static final double log10MinusE = Math.log10(Math.exp(1.0));
|
||||
|
||||
private static final boolean DEBUG = false;
|
||||
/**
|
||||
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
|
||||
*
|
||||
* @param minQualityScore Minimum site quality score to evaluate
|
||||
* @param maxQualityScore Maximum site quality score to evaluate
|
||||
* @param phredScaledPrior Prior for site quality
|
||||
* @param UAC Argument Collection
|
||||
* @param refSamplePileup Reference sample pileup
|
||||
* @param refSampleVC VC with True alleles in reference sample pileup
|
||||
* @param minPower Minimum power
|
||||
*/
|
||||
public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior,
|
||||
ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) {
|
||||
this.maxQualityScore = maxQualityScore;
|
||||
this.minQualityScore = minQualityScore;
|
||||
this.phredScaledPrior = phredScaledPrior;
|
||||
log10minPower = Math.log10(minPower);
|
||||
public ErrorModel (final UnifiedArgumentCollection UAC,
|
||||
final ReadBackedPileup refSamplePileup,
|
||||
VariantContext refSampleVC, final ReferenceContext refContext) {
|
||||
this.maxQualityScore = UAC.maxQualityScore;
|
||||
this.minQualityScore = UAC.minQualityScore;
|
||||
this.phredScaledPrior = UAC.phredScaledPrior;
|
||||
log10minPower = Math.log10(UAC.minPower);
|
||||
|
||||
PairHMMIndelErrorModel pairModel = null;
|
||||
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
|
||||
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
|
||||
double[][] perReadLikelihoods = null;
|
||||
|
||||
double[] model = new double[maxQualityScore+1];
|
||||
Arrays.fill(model,Double.NEGATIVE_INFINITY);
|
||||
|
|
@ -61,11 +68,17 @@ public class ErrorModel {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
if (refSampleVC.isIndel()) {
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||
}
|
||||
}
|
||||
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
// maximum uncertainty if there's no ref data at site
|
||||
model[q] = p;
|
||||
|
|
@ -75,23 +88,48 @@ public class ErrorModel {
|
|||
else {
|
||||
hasData = true;
|
||||
int matches = 0;
|
||||
int coverage = refSamplePileup.getNumberOfElements();
|
||||
int coverage = 0;
|
||||
|
||||
Allele refAllele = refSampleVC.getReference();
|
||||
|
||||
if (refSampleVC.isIndel()) {
|
||||
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||
if (!haplotypeMap.isEmpty())
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||
}
|
||||
int idx = 0;
|
||||
for (PileupElement refPileupElement : refSamplePileup) {
|
||||
if (DEBUG)
|
||||
System.out.println(refPileupElement.toString());
|
||||
boolean isMatch = false;
|
||||
for (Allele allele : refSampleVC.getAlleles())
|
||||
isMatch |= pileupElementMatches(refPileupElement, allele, refAllele);
|
||||
for (Allele allele : refSampleVC.getAlleles()) {
|
||||
boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
|
||||
if (DEBUG) System.out.println(m);
|
||||
isMatch |= m;
|
||||
}
|
||||
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
|
||||
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
||||
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
||||
if (!isInformativeElement(perAlleleLikelihoods))
|
||||
matches++;
|
||||
else
|
||||
matches += (isMatch?1:0);
|
||||
|
||||
matches += (isMatch?1:0);
|
||||
// System.out.format("MATCH:%b\n",isMatch);
|
||||
} else {
|
||||
matches += (isMatch?1:0);
|
||||
}
|
||||
coverage++;
|
||||
}
|
||||
|
||||
int mismatches = coverage - matches;
|
||||
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
||||
if (coverage==0)
|
||||
model[q] = p;
|
||||
else
|
||||
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
||||
}
|
||||
this.refDepth = coverage;
|
||||
}
|
||||
|
|
@ -101,6 +139,17 @@ public class ErrorModel {
|
|||
}
|
||||
|
||||
|
||||
@Requires("likelihoods.length>0")
|
||||
private boolean isInformativeElement(double[] likelihoods) {
|
||||
// if likelihoods are the same, they're not informative
|
||||
final double thresh = 0.1;
|
||||
int maxIdx = MathUtils.maxElementIndex(likelihoods);
|
||||
int minIdx = MathUtils.minElementIndex(likelihoods);
|
||||
if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Simple constructor that just takes a given log-probability vector as error model.
|
||||
* Only intended for unit testing, not general usage.
|
||||
|
|
@ -115,23 +164,27 @@ public class ErrorModel {
|
|||
|
||||
}
|
||||
|
||||
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) {
|
||||
/* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
||||
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
|
||||
if (DEBUG)
|
||||
System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
||||
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
|
||||
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
|
||||
*/
|
||||
|
||||
//pileupElement.
|
||||
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
||||
if (allele.isReference()) {
|
||||
// for a ref allele, any base mismatch or new indel is a mismatch.
|
||||
if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case
|
||||
return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]);
|
||||
if(allele.getBases().length>0)
|
||||
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
||||
// could clean up/simplify this when unpadding is removed
|
||||
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||
else
|
||||
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
||||
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
||||
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||
}
|
||||
|
||||
// for non-ref alleles to compare:
|
||||
if (refAllele.getBases().length == allele.getBases().length)
|
||||
// alleles have the same length (eg snp or mnp)
|
||||
return pileupElement.getBase() == allele.getBases()[0];
|
||||
|
|
@ -209,18 +262,19 @@ public class ErrorModel {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
String result = "(";
|
||||
StringBuilder result = new StringBuilder("(");
|
||||
boolean skipComma = true;
|
||||
for (double v : probabilityVector.getProbabilityVector()) {
|
||||
if (skipComma) {
|
||||
skipComma = false;
|
||||
}
|
||||
else {
|
||||
result += ",";
|
||||
result.append(",");
|
||||
}
|
||||
result += String.format("%.4f", v);
|
||||
result.append(String.format("%.4f", v));
|
||||
}
|
||||
return result + ")";
|
||||
result.append(")");
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static int getTotalReferenceDepth(HashMap<String, ErrorModel> perLaneErrorModels) {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
|
|
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
ploidy = UAC.samplePloidy;
|
||||
this.UAC = UAC;
|
||||
|
|
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
for ( final double[] likelihoods : GLs ) {
|
||||
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
|
||||
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
|
||||
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
|
||||
for (int k=1; k < acCount.length;k++) {
|
||||
if (acCount[k] > 0)
|
||||
|
|
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
return newPool;
|
||||
}
|
||||
|
||||
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
|
||||
// todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
|
||||
/**
|
||||
*
|
||||
* @param set ExactACset holding conformation to be computed
|
||||
|
|
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
continue;
|
||||
|
||||
|
||||
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
|
||||
GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
|
||||
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
|
||||
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
final double[] x = originalPool.getLikelihoodsAsVector(true);
|
||||
while(firstIterator.hasNext()) {
|
||||
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
|
||||
firstIterator.next();
|
||||
}
|
||||
|
||||
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
final double[] y = yy.clone();
|
||||
while(secondIterator.hasNext()) {
|
||||
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
|
||||
|
|
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// initialize output to -log10(choose(m1+m2,[k1 k2...])
|
||||
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
|
||||
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
|
||||
|
||||
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
|
||||
|
|
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
|
||||
|
||||
// for current conformation, get all possible ways to break vector K into two components G1 and G2
|
||||
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
|
||||
while (innerIterator.hasNext()) {
|
||||
// check if breaking current conformation into g1 and g2 is feasible.
|
||||
|
|
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
if ( numOriginalAltAlleles == numNewAltAlleles) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
|
||||
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
|
|
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// find the genotype with maximum likelihoods
|
||||
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
|
||||
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
|
||||
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
|
||||
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
|
||||
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
|
||||
|
|
@ -681,8 +681,8 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
|
||||
// per-pool logging of AC and AF
|
||||
gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
|
||||
gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
|
||||
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
|
||||
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
|
||||
|
||||
// remove PLs if necessary
|
||||
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
|
||||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public abstract class PoolGenotypeLikelihoods {
|
||||
public abstract class GeneralPloidyGenotypeLikelihoods {
|
||||
protected final int numChromosomes;
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
|
|
@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
|
||||
private static final boolean FAST_GL_COMPUTATION = true;
|
||||
// constructor with given logPL elements
|
||||
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
|
||||
public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
|
||||
this.alleles = alleles;
|
||||
this.nAlleles = alleles.size();
|
||||
numChromosomes = ploidy;
|
||||
|
|
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
|
||||
} else {
|
||||
if (logLikelihoods.length != likelihoodDim)
|
||||
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
|
||||
throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
|
||||
|
||||
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
|
||||
}
|
||||
|
|
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
final int numAlleles = currentState.length;
|
||||
final int ploidy = restrictSumTo;
|
||||
|
||||
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
|
||||
linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("BUG: Not supported");
|
||||
|
|
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
|
||||
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
|
||||
|
||||
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
|
||||
int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
|
||||
double[] newPLs = new double[newPLSize];
|
||||
|
||||
|
||||
|
|
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
|
|||
newCount[idx] = pVec[permutationKey[idx]];
|
||||
|
||||
// get corresponding index from new count
|
||||
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
|
||||
int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
|
||||
newPLs[outputIdx] = pl;
|
||||
if (VERBOSE) {
|
||||
System.out.println("Old Key:"+Arrays.toString(pVec));
|
||||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
|
||||
//protected Set<String> laneIDs;
|
||||
public enum Model {
|
||||
|
|
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC,logger);
|
||||
this.UAC = UAC;
|
||||
|
||||
|
|
@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
|
||||
referenceSampleVC.getAlleles())
|
||||
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
|
||||
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
|
||||
.make();
|
||||
}
|
||||
|
|
@ -138,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
protected static class PoolGenotypeData {
|
||||
|
||||
public final String name;
|
||||
public final PoolGenotypeLikelihoods GL;
|
||||
public final GeneralPloidyGenotypeLikelihoods GL;
|
||||
public final int depth;
|
||||
public final List<Allele> alleles;
|
||||
|
||||
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
|
||||
public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
|
||||
this.name = name;
|
||||
this.GL = GL;
|
||||
this.depth = depth;
|
||||
|
|
@ -237,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
||||
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
||||
// actually compute likelihoods
|
||||
final int nGoodBases = GL.add(pileup, UAC);
|
||||
if ( nGoodBases > 0 )
|
||||
|
|
@ -247,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
// find the alternate allele(s) that we should be using
|
||||
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
||||
|
||||
if (alleles == null || alleles.isEmpty())
|
||||
return null;
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final int endLoc = getEndLocation(tracker, ref, alleles);
|
||||
|
|
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
for ( PoolGenotypeData sampleData : GLs ) {
|
||||
// extract from multidimensional array
|
||||
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
|
||||
final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
|
||||
allAlleles, alleles);
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
|
|
@ -313,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
refLanePileup = refPileup.getPileupForLane(laneID);
|
||||
|
||||
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
|
||||
perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower));
|
||||
perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref));
|
||||
}
|
||||
return perLaneErrorModels;
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
Abstract methods - must be implemented in derived classes
|
||||
*/
|
||||
|
||||
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
|
|
@ -18,26 +18,30 @@ import java.util.*;
|
|||
* Time: 10:06 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
||||
public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
|
||||
final PairHMMIndelErrorModel pairModel;
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
||||
final ReferenceContext refContext;
|
||||
final int eventLength;
|
||||
double[][] readHaplotypeLikelihoods;
|
||||
|
||||
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext) {
|
||||
final byte refBase;
|
||||
|
||||
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.pairModel = pairModel;
|
||||
this.haplotypeMap = haplotypeMap;
|
||||
this.refContext = referenceContext;
|
||||
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
||||
// todo - not needed if indel alleles have base at current position
|
||||
this.refBase = referenceContext.getBase();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -138,10 +142,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
|
||||
|
||||
if (!hasReferenceSampleData) {
|
||||
final int numHaplotypes = haplotypeMap.size();
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||
n = readHaplotypeLikelihoods.length;
|
||||
} else {
|
||||
Allele refAllele = null;
|
||||
|
|
@ -161,7 +163,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
int idx =0;
|
||||
for (Allele allele : alleles) {
|
||||
int cnt = numSeenBases.get(idx);
|
||||
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0));
|
||||
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
|
||||
}
|
||||
|
||||
n++;
|
||||
|
|
@ -32,17 +32,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
|
||||
public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
|
||||
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
||||
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded = false;
|
||||
/*
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
|
|
@ -60,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
}
|
||||
*/
|
||||
|
||||
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
|
||||
protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
|
||||
super(UAC, logger);
|
||||
|
||||
|
||||
|
|
@ -70,20 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
}
|
||||
|
||||
|
||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
||||
return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation){
|
||||
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
||||
}
|
||||
|
||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
|
|
@ -94,12 +85,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
final List<Allele> allAllelesToUse){
|
||||
|
||||
|
||||
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
List<Allele> alleles = pair.first;
|
||||
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
|
||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||
allelesArePadded = pair.second;
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
||||
haplotypeMap.clear();
|
||||
|
|
@ -131,6 +120,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
protected int getEndLocation(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final List<Allele> allelesToUse) {
|
||||
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
|
||||
return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
|
|||
* and posteriors given a pile of bases and quality scores
|
||||
*
|
||||
*/
|
||||
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
|
||||
public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
|
||||
|
||||
final List<Allele> myAlleles;
|
||||
final int[] alleleIndices;
|
||||
|
|
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
|
|||
* @param useBQAedPileup Use BAQed pileup
|
||||
* @param ignoreLaneInformation If true, lane info is ignored
|
||||
*/
|
||||
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
|
||||
public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.useBAQedPileup = useBQAedPileup;
|
||||
|
||||
myAlleles = new ArrayList<Allele>(alleles);
|
||||
|
||||
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref!
|
||||
Allele refAllele = alleles.get(0);
|
||||
//sanity check: by construction, first allele should ALWAYS be the reference alleles
|
||||
if (!refAllele.isReference())
|
||||
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
|
||||
|
||||
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
|
||||
|
||||
if (myAlleles.size() < BaseUtils.BASES.length) {
|
||||
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
|
||||
|
|
@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
|
||||
public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
|
||||
|
||||
|
||||
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
|
||||
}
|
||||
|
||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation) {
|
||||
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
}
|
||||
|
||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
|
|
@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -57,8 +56,12 @@ public class GenotypingEngine {
|
|||
|
||||
// This function is the streamlined approach, currently not being used
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
|
||||
final ArrayList<Haplotype> haplotypes,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser ) {
|
||||
// Prepare the list of haplotype indices to genotype
|
||||
final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
|
||||
|
||||
|
|
@ -184,8 +187,13 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
|
||||
final ArrayList<Haplotype> haplotypes,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||
|
||||
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
|
||||
|
||||
|
|
@ -220,7 +228,6 @@ public class GenotypingEngine {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Walk along each position in the key set and create each event to be outputted
|
||||
for( final int loc : startPosKeySet ) {
|
||||
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
|
||||
|
|
@ -317,7 +324,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int MAX_SIZE_TO_COMBINE = 10;
|
||||
final int MAX_SIZE_TO_COMBINE = 15;
|
||||
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
|
||||
if( startPosKeySet.size() <= 1 ) { return; }
|
||||
|
||||
|
|
@ -334,10 +341,10 @@ public class GenotypingEngine {
|
|||
boolean isBiallelic = true;
|
||||
VariantContext thisVC = null;
|
||||
VariantContext nextVC = null;
|
||||
int x11 = 0;
|
||||
int x12 = 0;
|
||||
int x21 = 0;
|
||||
int x22 = 0;
|
||||
double x11 = Double.NEGATIVE_INFINITY;
|
||||
double x12 = Double.NEGATIVE_INFINITY;
|
||||
double x21 = Double.NEGATIVE_INFINITY;
|
||||
double x22 = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// only make complex substitutions out of consecutive biallelic sites
|
||||
|
|
@ -360,21 +367,24 @@ public class GenotypingEngine {
|
|||
}
|
||||
}
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
// BUGBUG: use haplotype likelihoods per-sample to make this more accurate
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11++; }
|
||||
else { x12++; }
|
||||
} else {
|
||||
if( nextHapVC == null ) { x21++; }
|
||||
else { x22++; }
|
||||
final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
||||
haplotypeList.add(h);
|
||||
for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
||||
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
||||
} else {
|
||||
if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
|
||||
else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
|
||||
}
|
||||
}
|
||||
}
|
||||
if( thisVC == null || nextVC == null ) {
|
||||
continue;
|
||||
//throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
|
||||
}
|
||||
if( isBiallelic ) {
|
||||
final double R2 = calculateR2LD( x11, x12, x21, x22 );
|
||||
final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
|
||||
if( DEBUG ) {
|
||||
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
|
||||
System.out.println("-- " + thisVC);
|
||||
|
|
@ -419,24 +429,21 @@ public class GenotypingEngine {
|
|||
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int thisStart = thisVC.getStart();
|
||||
final int nextStart = nextVC.getStart();
|
||||
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
||||
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
||||
byte[] refBases = new byte[]{};
|
||||
byte[] altBases = new byte[]{};
|
||||
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
||||
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
||||
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
int locus;
|
||||
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
final byte refByte = ref[locus - refLoc.getStart()];
|
||||
refBases = ArrayUtils.add(refBases, refByte);
|
||||
altBases = ArrayUtils.add(altBases, refByte);
|
||||
}
|
||||
if( nextVC.hasReferenceBaseForIndel() ) {
|
||||
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
|
||||
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
|
||||
}
|
||||
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
|
||||
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
|
||||
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
||||
|
||||
int iii = 0;
|
||||
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
||||
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
|
||||
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
||||
}
|
||||
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
|
||||
|
|
@ -445,12 +452,11 @@ public class GenotypingEngine {
|
|||
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
|
||||
}
|
||||
|
||||
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
|
||||
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
|
||||
final int total = x11 + x12 + x21 + x22;
|
||||
final double pa1b1 = ((double) x11) / ((double) total);
|
||||
final double pa1b2 = ((double) x12) / ((double) total);
|
||||
final double pa2b1 = ((double) x21) / ((double) total);
|
||||
protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
|
||||
final double total = x11 + x12 + x21 + x22;
|
||||
final double pa1b1 = x11 / total;
|
||||
final double pa1b2 = x12 / total;
|
||||
final double pa2b1 = x21 / total;
|
||||
final double pa1 = pa1b1 + pa1b2;
|
||||
final double pb1 = pa1b1 + pa2b1;
|
||||
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
|
||||
|
|
@ -530,35 +536,37 @@ public class GenotypingEngine {
|
|||
final int elementLength = ce.getLength();
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
|
||||
boolean allN = true;
|
||||
for( final byte b : insertionBases ) {
|
||||
if( b != (byte) 'N' ) {
|
||||
allN = false;
|
||||
break;
|
||||
}
|
||||
{
|
||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) ) {
|
||||
insertionAlleles.add( Allele.create(refByte, true) );
|
||||
}
|
||||
if( !allN ) {
|
||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
} else {
|
||||
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||
if( BaseUtils.isAllRegularBases(insertionBases) ) {
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
||||
}
|
||||
|
||||
}
|
||||
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
}
|
||||
alignmentPos += elementLength;
|
||||
break;
|
||||
}
|
||||
case S:
|
||||
{
|
||||
alignmentPos += elementLength;
|
||||
break;
|
||||
}
|
||||
case D:
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
|
||||
{
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||
// BUGBUG: how often does this symbolic deletion allele case happen?
|
||||
|
|
@ -568,13 +576,20 @@ public class GenotypingEngine {
|
|||
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
||||
//} else {
|
||||
final byte refByte = ref[refPos-1];
|
||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
|
||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
||||
deletionAlleles.add( Allele.create(refByte, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||
}
|
||||
//}
|
||||
refPos += elementLength;
|
||||
break;
|
||||
}
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
{
|
||||
int numSinceMismatch = -1;
|
||||
int stopOfMismatch = -1;
|
||||
int startOfMismatch = -1;
|
||||
|
|
@ -597,11 +612,13 @@ public class GenotypingEngine {
|
|||
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
|
||||
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
|
||||
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
|
||||
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
||||
snpAlleles.add( Allele.create( refBases, true ) );
|
||||
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
||||
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
||||
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
||||
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
|
||||
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
||||
snpAlleles.add( Allele.create( refBases, true ) );
|
||||
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
||||
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
||||
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
||||
}
|
||||
numSinceMismatch = -1;
|
||||
stopOfMismatch = -1;
|
||||
startOfMismatch = -1;
|
||||
|
|
@ -611,6 +628,7 @@ public class GenotypingEngine {
|
|||
alignmentPos++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case N:
|
||||
case H:
|
||||
case P:
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
|
|
@ -56,6 +58,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -103,7 +106,7 @@ import java.util.*;
|
|||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=275)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
/**
|
||||
|
|
@ -126,9 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
||||
protected int MIN_PRUNE_FACTOR = 1;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
|
||||
protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
|
||||
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
|
||||
|
||||
|
|
@ -139,9 +144,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
||||
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
|
||||
|
||||
@Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
|
||||
protected boolean USE_EXPANDED_TRIGGER_SET = false;
|
||||
|
||||
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
|
||||
protected boolean USE_ALLELES_TRIGGER = false;
|
||||
|
||||
|
|
@ -188,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
protected String[] annotationClassesToUse = { "Standard" };
|
||||
|
||||
@ArgumentCollection
|
||||
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
|
||||
|
||||
// the calculation arguments
|
||||
private UnifiedGenotyperEngine UG_engine = null;
|
||||
|
|
@ -239,12 +241,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the output VCF header
|
||||
|
|
@ -308,8 +310,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
public boolean wantsNonPrimaryReads() { return true; }
|
||||
|
||||
@Override
|
||||
@Ensures({"result >= 0.0", "result <= 1.0"})
|
||||
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
|
||||
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||
public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
|
||||
|
||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
|
||||
|
|
@ -318,56 +320,52 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
}
|
||||
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
|
||||
return 1.0;
|
||||
return new ActivityProfileResult(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
if( USE_ALLELES_TRIGGER ) {
|
||||
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
}
|
||||
|
||||
if( context == null ) { return 0.0; }
|
||||
if( context == null ) { return new ActivityProfileResult(0.0); }
|
||||
|
||||
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
|
||||
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
|
||||
for( final String sample : splitContexts.keySet() ) {
|
||||
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
|
||||
for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
|
||||
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
|
||||
Arrays.fill(genotypeLikelihoods, 0.0);
|
||||
|
||||
for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) {
|
||||
final byte qual = ( USE_EXPANDED_TRIGGER_SET ?
|
||||
( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() )
|
||||
: p.getQual() );
|
||||
if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) {
|
||||
for( final PileupElement p : sample.getValue().getBasePileup() ) {
|
||||
final byte qual = p.getQual();
|
||||
if( p.isDeletion() || qual > (byte) 18) {
|
||||
int AA = 0; final int AB = 1; int BB = 2;
|
||||
if( USE_EXPANDED_TRIGGER_SET ) {
|
||||
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ||
|
||||
(!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) {
|
||||
AA = 2;
|
||||
BB = 0;
|
||||
}
|
||||
} else {
|
||||
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
|
||||
AA = 2;
|
||||
BB = 0;
|
||||
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
|
||||
AA = 2;
|
||||
BB = 0;
|
||||
if( p.isNextToSoftClip() ) {
|
||||
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
|
||||
}
|
||||
}
|
||||
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
|
||||
genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
|
||||
genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
|
||||
genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
|
||||
genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
|
||||
genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
|
||||
}
|
||||
}
|
||||
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
|
||||
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
|
||||
}
|
||||
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add( FAKE_REF_ALLELE );
|
||||
alleles.add( FAKE_ALT_ALLELE );
|
||||
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
|
||||
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -416,45 +414,48 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
|
||||
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
|
||||
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
|
||||
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
|
||||
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
||||
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
||||
|
||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||
|
||||
// add some custom annotations to the calls
|
||||
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
||||
// Calculate the number of variants on the haplotype
|
||||
int maxNumVar = 0;
|
||||
for( final Allele allele : callResult.getFirst().getAlleles() ) {
|
||||
if( !allele.isReference() ) {
|
||||
for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
|
||||
final int numVar = haplotype.getEventMap().size();
|
||||
if( numVar > maxNumVar ) { maxNumVar = numVar; }
|
||||
|
||||
if( !GENOTYPE_FULL_ACTIVE_REGION ) {
|
||||
// add some custom annotations to the calls
|
||||
|
||||
// Calculate the number of variants on the haplotype
|
||||
int maxNumVar = 0;
|
||||
for( final Allele allele : callResult.getFirst().getAlleles() ) {
|
||||
if( !allele.isReference() ) {
|
||||
for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
|
||||
final int numVar = haplotype.getEventMap().size();
|
||||
if( numVar > maxNumVar ) { maxNumVar = numVar; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Calculate the event length
|
||||
int maxLength = 0;
|
||||
for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
|
||||
final int length = a.length() - annotatedCall.getReference().length();
|
||||
if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
|
||||
}
|
||||
// Calculate the event length
|
||||
int maxLength = 0;
|
||||
for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
|
||||
final int length = a.length() - annotatedCall.getReference().length();
|
||||
if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
|
||||
}
|
||||
|
||||
myAttributes.put("NVH", maxNumVar);
|
||||
myAttributes.put("NumHapEval", bestHaplotypes.size());
|
||||
myAttributes.put("NumHapAssembly", haplotypes.size());
|
||||
myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
|
||||
myAttributes.put("EVENTLENGTH", maxLength);
|
||||
myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
|
||||
myAttributes.put("extType", annotatedCall.getType().toString() );
|
||||
myAttributes.put("NVH", maxNumVar);
|
||||
myAttributes.put("NumHapEval", bestHaplotypes.size());
|
||||
myAttributes.put("NumHapAssembly", haplotypes.size());
|
||||
myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
|
||||
myAttributes.put("EVENTLENGTH", maxLength);
|
||||
myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
|
||||
myAttributes.put("extType", annotatedCall.getType().toString() );
|
||||
|
||||
//if( likelihoodCalculationEngine.haplotypeScore != null ) {
|
||||
// myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
|
||||
//}
|
||||
if( annotatedCall.hasAttribute("QD") ) {
|
||||
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
|
||||
//if( likelihoodCalculationEngine.haplotypeScore != null ) {
|
||||
// myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
|
||||
//}
|
||||
if( annotatedCall.hasAttribute("QD") ) {
|
||||
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
|
||||
}
|
||||
}
|
||||
|
||||
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
|
||||
|
|
@ -493,7 +494,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
|
||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
|
||||
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
|
||||
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
|
||||
activeRegion.clearReads();
|
||||
|
|
@ -522,7 +523,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||
if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
readsToRemove.add(rec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,11 +82,11 @@ public class KBestPaths {
|
|||
}
|
||||
}
|
||||
|
||||
protected static class PathComparatorLowestEdge implements Comparator<Path> {
|
||||
public int compare(final Path path1, final Path path2) {
|
||||
return path2.lowestEdge - path1.lowestEdge;
|
||||
}
|
||||
}
|
||||
//protected static class PathComparatorLowestEdge implements Comparator<Path> {
|
||||
// public int compare(final Path path1, final Path path2) {
|
||||
// return path2.lowestEdge - path1.lowestEdge;
|
||||
// }
|
||||
//}
|
||||
|
||||
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
|
||||
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
|
||||
|
|
@ -99,7 +99,7 @@ public class KBestPaths {
|
|||
}
|
||||
}
|
||||
|
||||
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
|
||||
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
||||
Collections.reverse(bestPaths);
|
||||
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
|
||||
}
|
||||
|
|
@ -114,8 +114,8 @@ public class KBestPaths {
|
|||
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
|
||||
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
|
||||
// clean out some low scoring paths
|
||||
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
|
||||
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
|
||||
Collections.sort(bestPaths, new PathComparatorTotalScore() );
|
||||
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
|
||||
}
|
||||
bestPaths.add(path);
|
||||
} else if( n.val > 10000) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -50,18 +51,17 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
|
||||
public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
|
||||
final int numHaplotypes = haplotypes.size();
|
||||
|
||||
int X_METRIC_LENGTH = 0;
|
||||
for( final String sample : perSampleReadList.keySet() ) {
|
||||
for( final GATKSAMRecord read : perSampleReadList.get(sample) ) {
|
||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||
for( final GATKSAMRecord read : sample.getValue() ) {
|
||||
final int readLength = read.getReadLength();
|
||||
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
|
||||
}
|
||||
}
|
||||
int Y_METRIC_LENGTH = 0;
|
||||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
final int haplotypeLength = haplotypes.get(jjj).getBases().length;
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
final int haplotypeLength = h.getBases().length;
|
||||
if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
|
||||
}
|
||||
|
||||
|
|
@ -90,8 +90,10 @@ public class LikelihoodCalculationEngine {
|
|||
final int numHaplotypes = haplotypes.size();
|
||||
final int numReads = reads.size();
|
||||
final double[][] readLikelihoods = new double[numHaplotypes][numReads];
|
||||
final int[][] readCounts = new int[numHaplotypes][numReads];
|
||||
for( int iii = 0; iii < numReads; iii++ ) {
|
||||
final GATKSAMRecord read = reads.get(iii);
|
||||
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
|
||||
|
||||
final byte[] overallGCP = new byte[read.getReadLength()];
|
||||
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
|
||||
|
|
@ -103,7 +105,7 @@ public class LikelihoodCalculationEngine {
|
|||
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
}
|
||||
|
||||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
|
|
@ -114,10 +116,11 @@ public class LikelihoodCalculationEngine {
|
|||
readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
|
||||
readQuals, readInsQuals, readDelQuals, overallGCP,
|
||||
haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
|
||||
readCounts[jjj][iii] = readCount;
|
||||
}
|
||||
}
|
||||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] );
|
||||
haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -142,10 +145,20 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
|
||||
}
|
||||
|
||||
|
||||
// This function takes just a single sample and a haplotypeMapping
|
||||
@Requires({"haplotypeMapping.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
|
||||
public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
|
||||
final TreeSet<String> sampleSet = new TreeSet<String>();
|
||||
sampleSet.add(sample);
|
||||
return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
|
||||
}
|
||||
|
||||
// This function takes a set of samples to pool over and a haplotypeMapping
|
||||
@Requires({"haplotypeMapping.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
|
||||
public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
|
||||
|
||||
final int numHaplotypes = haplotypeMapping.size();
|
||||
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||
|
|
@ -154,17 +167,21 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
|
||||
// compute the diploid haplotype likelihoods
|
||||
// todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
||||
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
|
||||
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
|
||||
for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
|
||||
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
|
||||
double haplotypeLikelihood = 0.0;
|
||||
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
|
||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||
// First term is approximated by Jacobian log with table lookup.
|
||||
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
|
||||
for( final String sample : samples ) {
|
||||
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
|
||||
final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
|
||||
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
|
||||
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
|
||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||
// First term is approximated by Jacobian log with table lookup.
|
||||
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
|
||||
}
|
||||
}
|
||||
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
|
||||
}
|
||||
|
|
@ -176,48 +193,6 @@ public class LikelihoodCalculationEngine {
|
|||
return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
|
||||
}
|
||||
|
||||
@Requires({"haplotypes.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
|
||||
public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final Set<String> samples ) {
|
||||
// set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
|
||||
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
|
||||
list.add(h);
|
||||
haplotypeMapping.add(list);
|
||||
}
|
||||
|
||||
final int numHaplotypes = haplotypeMapping.size();
|
||||
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// compute the diploid haplotype likelihoods
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
||||
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
|
||||
for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
|
||||
double haplotypeLikelihood = 0.0;
|
||||
for( final String sample : samples ) {
|
||||
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
|
||||
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
|
||||
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
|
||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||
// First term is approximated by Jacobian log with table lookup.
|
||||
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
|
||||
}
|
||||
}
|
||||
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// normalize the diploid likelihoods matrix
|
||||
return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
|
||||
}
|
||||
|
||||
@Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"})
|
||||
protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) {
|
||||
|
|
@ -306,12 +281,19 @@ public class LikelihoodCalculationEngine {
|
|||
final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
|
||||
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
|
||||
bestHaplotypesIndexList.add(0); // always start with the reference haplotype
|
||||
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together
|
||||
// set up the default 1-to-1 haplotype mapping object
|
||||
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
|
||||
list.add(h);
|
||||
haplotypeMapping.add(list);
|
||||
}
|
||||
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
|
||||
|
||||
int hap1 = 0;
|
||||
int hap2 = 0;
|
||||
//double bestElement = Double.NEGATIVE_INFINITY;
|
||||
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
|
||||
final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
|
||||
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
|
||||
double maxElement = Double.NEGATIVE_INFINITY;
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
|
|
@ -343,9 +325,9 @@ public class LikelihoodCalculationEngine {
|
|||
public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
|
||||
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
||||
for( final String sample : perSampleReadList.keySet() ) {
|
||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
||||
final ArrayList<GATKSAMRecord> readsForThisSample = perSampleReadList.get(sample);
|
||||
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
||||
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
|
||||
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
|
||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||
|
|
@ -355,7 +337,7 @@ public class LikelihoodCalculationEngine {
|
|||
for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
|
||||
double maxLikelihood = Double.NEGATIVE_INFINITY;
|
||||
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
|
||||
final double likelihood = h.getReadLikelihoods(sample)[iii];
|
||||
final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
|
||||
if( likelihood > maxLikelihood ) {
|
||||
maxLikelihood = likelihood;
|
||||
}
|
||||
|
|
@ -390,13 +372,13 @@ public class LikelihoodCalculationEngine {
|
|||
readList = new ArrayList<GATKSAMRecord>();
|
||||
alleleReadMap.put(Allele.NO_CALL, readList);
|
||||
}
|
||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) {
|
||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||
readList.add(read);
|
||||
}
|
||||
}
|
||||
returnMap.put(sample, alleleReadMap);
|
||||
returnMap.put(sample.getKey(), alleleReadMap);
|
||||
}
|
||||
return returnMap;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
|
|||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -68,7 +69,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
|
||||
}
|
||||
|
||||
private void createDeBruijnGraphs( final ArrayList<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
||||
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
||||
graphs.clear();
|
||||
|
||||
// create the graph
|
||||
|
|
@ -161,7 +162,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final ArrayList<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
|
||||
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
|
||||
final byte[] refSequence = refHaplotype.getBases();
|
||||
if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
|
||||
final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
|
||||
|
|
@ -183,6 +184,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
for( final GATKSAMRecord read : reads ) {
|
||||
final byte[] sequence = read.getReadBases();
|
||||
final byte[] qualities = read.getBaseQualities();
|
||||
final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not readuced
|
||||
if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
|
||||
final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
|
||||
for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {
|
||||
|
|
@ -194,6 +196,14 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
break;
|
||||
}
|
||||
}
|
||||
int countNumber = 1;
|
||||
if (read.isReducedRead()) {
|
||||
// compute mean number of reduced read counts in current kmer span
|
||||
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||
// precise rounding can make a difference with low consensus counts
|
||||
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
}
|
||||
|
||||
if( !badKmer ) {
|
||||
// get the kmers
|
||||
final byte[] kmer1 = new byte[KMER_LENGTH];
|
||||
|
|
@ -201,7 +211,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final byte[] kmer2 = new byte[KMER_LENGTH];
|
||||
System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
|
||||
|
||||
addKmersToGraph(graph, kmer1, kmer2, false);
|
||||
for (int k=0; k < countNumber; k++)
|
||||
addKmersToGraph(graph, kmer1, kmer2, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -230,7 +241,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
return true;
|
||||
}
|
||||
|
||||
private void printGraphs() {
|
||||
protected void printGraphs() {
|
||||
int count = 0;
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
GRAPH_WRITER.println("digraph kmer" + count++ +" {");
|
||||
|
|
@ -281,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
||||
|
|
@ -311,7 +322,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
|
||||
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
|
||||
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
|
||||
if( haplotype == null ) { return false; }
|
||||
|
||||
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
||||
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
|
||||
|
|
|
|||
|
|
@ -50,20 +50,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -74,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-"+params.args, spec).getFirst();
|
||||
|
||||
WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||
params.getCommandLine() + " -nt 2",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||
// TODO -- re-enable once parallelization is fixed in BaseRecalibrator
|
||||
//WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||
// params.getCommandLine() + " -nt 2",
|
||||
// Arrays.asList(params.md5));
|
||||
//executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -94,6 +96,20 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
executeTest("testBQSRFailWithoutDBSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBQSRFailWithSolidNoCall() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T BaseRecalibrator" +
|
||||
" -R " + b36KGReference +
|
||||
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
|
||||
" -L 1:50,000-80,000" +
|
||||
" --no_plots" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
UserException.class);
|
||||
executeTest("testBQSRFailWithSolidNoCall", spec);
|
||||
}
|
||||
|
||||
private static class PRTest {
|
||||
final String args;
|
||||
final String md5;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import java.util.Arrays;
|
|||
* Time: 7:44 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolAFCalculationModelUnitTest extends BaseTest {
|
||||
public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
||||
|
||||
static double[] AA1, AB1, BB1;
|
||||
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
|
||||
|
|
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
|
|||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
|
||||
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
|
||||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||
double[] priors = new double[len]; // flat priors
|
||||
|
||||
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
|
||||
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
|
|
@ -41,7 +41,7 @@ import java.io.PrintStream;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class PoolGenotypeLikelihoodsUnitTest {
|
||||
public class GeneralPloidyGenotypeLikelihoodsUnitTest {
|
||||
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
final Logger logger = Logger.getLogger(Walker.class);
|
||||
|
|
@ -49,11 +49,18 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
private static final boolean SIMULATE_NOISY_PILEUP = false;
|
||||
private static final int NUM_SIMULATED_OBS = 10;
|
||||
|
||||
void PoolGenotypeLikelihoodsUnitTest() {
|
||||
UAC.minQualityScore = 5;
|
||||
UAC.maxQualityScore = 40;
|
||||
UAC.phredScaledPrior = (byte)20;
|
||||
UAC.minPower = 0.0;
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testStoringLikelihoodElements() {
|
||||
|
||||
|
||||
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
|
||||
// basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
|
||||
|
||||
int ploidy = 20;
|
||||
int numAlleles = 4;
|
||||
|
|
@ -71,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
for (int k=0; k < gls.length; k++)
|
||||
gls[k]= (double)k;
|
||||
|
||||
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
|
||||
GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
|
||||
double[] glnew = gl.getLikelihoods();
|
||||
|
||||
Assert.assertEquals(gls, glnew);
|
||||
|
|
@ -83,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
for (int ploidy = 2; ploidy < 10; ploidy++) {
|
||||
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
|
||||
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
|
||||
Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
|
||||
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// create iterator, compare linear index given by iterator with closed form function
|
||||
int numAlleles = 4;
|
||||
int ploidy = 2;
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
System.out.format("\n%d:",iterator.getLinearIndex());
|
||||
|
|
@ -104,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
System.out.format("%d ",aa);
|
||||
|
||||
|
||||
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
|
||||
int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
|
||||
System.out.format("Computed idx = %d\n",computedIdx);
|
||||
iterator.next();
|
||||
}
|
||||
|
|
@ -133,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
allelesToSubset.add(Allele.create("A",false));
|
||||
allelesToSubset.add(Allele.create("C",false));
|
||||
|
||||
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
|
||||
double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
|
||||
originalAlleles, allelesToSubset);
|
||||
|
||||
|
||||
|
|
@ -163,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testIndexIterator() {
|
||||
int[] seed = new int[]{1,2,3,4};
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
|
||||
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
|
||||
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
|
||||
|
||||
|
|
@ -221,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
}
|
||||
|
||||
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
|
||||
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
|
||||
private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
|
||||
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
int[] a = iterator.getCurrentVector();
|
||||
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
|
||||
int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
|
||||
if (VERBOSE) {
|
||||
System.out.format("%d:",iterator.getLinearIndex());
|
||||
for (int i=0; i < seed.length; i++)
|
||||
|
|
@ -251,8 +258,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte minQ = 5;
|
||||
final byte maxQ = 40;
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
|
|
@ -270,7 +275,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// get artificial alignment context for ref sample - no noise
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
@ -287,19 +292,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testIndelErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte minQ = 5;
|
||||
final byte maxQ = 40;
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final String altBases = "TCA";
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
||||
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
|
||||
trueAlleles.add(Allele.create("TC", false));
|
||||
trueAlleles.add(Allele.create(refByte, true));
|
||||
trueAlleles.add(Allele.create((char)refByte + "TC", false));
|
||||
|
||||
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
|
||||
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
|
||||
|
||||
|
||||
final int[] matchArray = {95, 995, 9995, 10000};
|
||||
|
|
@ -311,9 +314,9 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// get artificial alignment context for ref sample - no noise
|
||||
// CASE 1: Test HET insertion
|
||||
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
@ -329,12 +332,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// create deletion VC
|
||||
final int delLength = 4;
|
||||
final List<Allele> delAlleles = new ArrayList<Allele>();
|
||||
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
|
||||
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
|
||||
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
|
||||
delAlleles.add(Allele.create(refByte, false));
|
||||
|
||||
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
|
||||
|
||||
for (int matches: matchArray) {
|
||||
for (int mismatches: mismatchArray) {
|
||||
|
|
@ -343,7 +346,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
@ -388,8 +391,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
final byte refByte = readPileupTestProvider.getRefByte();
|
||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||
|
||||
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
|
||||
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
|
||||
|
||||
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
||||
final Set<String> laneIDs = new TreeSet<String>();
|
||||
|
|
@ -407,17 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
for (String laneID : laneIDs)
|
||||
noisyErrorModels.put(laneID, Q30ErrorModel);
|
||||
|
||||
// all first ref allele
|
||||
allAlleles.add(Allele.create(refByte,true));
|
||||
for (byte b: BaseUtils.BASES) {
|
||||
if (refByte == b)
|
||||
allAlleles.add(Allele.create(b,true));
|
||||
else
|
||||
if (refByte != b)
|
||||
allAlleles.add(Allele.create(b, false));
|
||||
}
|
||||
|
||||
final int refIdx = 0;
|
||||
int altIdx = -1;
|
||||
|
||||
for (int k=0; k < allAlleles.size(); k++)
|
||||
if (altByte == allAlleles.get(k).getBases()[0]) {
|
||||
altIdx = k;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
PrintStream out = null;
|
||||
if (SIMULATE_NOISY_PILEUP) {
|
||||
try {
|
||||
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
|
||||
out = new PrintStream(new File("GLUnitTest.table"));
|
||||
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
|
||||
}
|
||||
catch (Exception e) {}
|
||||
|
|
@ -441,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
// get now likelihoods for this
|
||||
|
||||
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
|
||||
final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
|
||||
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
if (VERBOSE) {
|
||||
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
|
||||
|
|
@ -470,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
// get now likelihoods for this
|
||||
|
||||
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
|
||||
final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
|
||||
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
mlPair = noisyGL.getMostLikelyACCount();
|
||||
|
||||
|
|
@ -12,50 +12,66 @@ import org.testng.annotations.Test;
|
|||
* Time: 11:28 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PoolCallerIntegrationTest extends WalkerTest {
|
||||
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
|
||||
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||
final String REFSAMPLE_NAME = "NA12878";
|
||||
final String MTINTERVALS = "MT";
|
||||
final String MTINTERVALS = "MT:1-3000";
|
||||
final String LSVINTERVALS = "20:40,000,000-41,000,000";
|
||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
|
||||
private void PC_MT_Test(String bam, String args, String name, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
|
||||
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
|
||||
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
|
||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + ref + SNP = MNP with ref base gap
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
|
|
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + SNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
|
|
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + insertion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + SNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
|
|
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + deletion = MNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + insertion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + insertion (abutting)
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
|
||||
// complex + complex
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
|
||||
|
|
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "882ece8391cfec30ab658970a487d078");
|
||||
HCTest(CEUTRIO_BAM, "", "6b30c7e1b6bbe80d180d9d67441cec12");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "6f458e04251bc4c09b5b544d46f19d68");
|
||||
HCTest(NA12878_BAM, "", "4cdfbfeadef00725974828310558d7d4");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "c172791007f867bf3b975d4194564d9e");
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "6183fb6e374976d7087150009685e043");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -41,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "94186811016b332a58c150df556278f8");
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "ab7593a7a60a2e9a66053572f1718df1");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,9 +95,10 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
|
|||
ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
|
||||
for( int iii = 1; iii <= 3; iii++) {
|
||||
Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
|
||||
int readCount = 1;
|
||||
if( readLikelihood != null ) {
|
||||
Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
|
||||
haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood});
|
||||
haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount});
|
||||
haplotypes.add(haplotype);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
*/
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -18,6 +20,7 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
||||
|
|
@ -143,6 +146,44 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
|
|||
Assert.assertTrue(graphEquals(graph, expectedGraph));
|
||||
}
|
||||
|
||||
@Test(enabled=false)
|
||||
// not ready yet
|
||||
public void testBasicGraphCreation() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
|
||||
final String altBase = (refBase==(byte)'A'?"C":"A");
|
||||
final int matches = 50;
|
||||
final int mismatches = 50;
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
|
||||
PrintStream graphWriter = null;
|
||||
|
||||
try{
|
||||
graphWriter = new PrintStream("du.txt");
|
||||
} catch (Exception e) {}
|
||||
|
||||
|
||||
SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
|
||||
final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
|
||||
refHaplotype.setIsReference(true);
|
||||
assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
|
||||
|
||||
/* // clean up the graphs by pruning and merging
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
|
||||
//eliminateNonRefPaths( graph );
|
||||
SimpleDeBruijnAssembler.mergeNodes( graph );
|
||||
}
|
||||
*/
|
||||
if( graphWriter != null ) {
|
||||
assembler.printGraphs();
|
||||
}
|
||||
|
||||
int k=2;
|
||||
|
||||
// find the best paths in the graphs
|
||||
// return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
|
||||
|
||||
}
|
||||
@Test(enabled = true)
|
||||
public void testEliminateNonRefPaths() {
|
||||
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,18 @@
|
|||
library("ggplot2")
|
||||
library(gplots)
|
||||
library("reshape")
|
||||
library("grid")
|
||||
library("tools") #For compactPDF in R 2.13+
|
||||
library(gsalib)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
if ( interactive() ) {
|
||||
args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
|
||||
} else {
|
||||
args <- commandArgs(TRUE)
|
||||
}
|
||||
data <- read.csv(args[1])
|
||||
gsa.report <- gsa.read.gatkreport(args[2])
|
||||
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
|
||||
|
||||
numRG = length(unique(data$ReadGroup))
|
||||
|
|
@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
|
|||
|
||||
p <- ggplot(d, aes(x=CovariateValue)) +
|
||||
xlab(paste(cov,"Covariate")) +
|
||||
ylab("Number of Observations") +
|
||||
ylab("No. of Observations (area normalized)") +
|
||||
blankTheme
|
||||
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +
|
||||
scale_y_continuous(formatter="comma")
|
||||
|
||||
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
|
||||
d <- d + scale_fill_manual(values=c("maroon1","blue"))
|
||||
d <- d + facet_grid(.~EventType)
|
||||
# d <- d + scale_y_continuous(formatter="comma")
|
||||
}
|
||||
}
|
||||
|
||||
pdf(args[2],height=9,width=15)
|
||||
if ( ! is.na(args[3]) )
|
||||
pdf(args[3],height=9,width=15)
|
||||
|
||||
#frame()
|
||||
textplot(gsa.report$Arguments, show.rownames=F)
|
||||
title(
|
||||
main="GATK BaseRecalibration report",
|
||||
sub=date())
|
||||
|
||||
distributeGraphRows(list(a,b,c), c(1,1,1))
|
||||
distributeGraphRows(list(d,e,f), c(1,1,1))
|
||||
dev.off()
|
||||
|
||||
# format the overall information
|
||||
rt0 <- data.frame(
|
||||
ReadGroup = gsa.report$RecalTable0$ReadGroup,
|
||||
EventType = gsa.report$RecalTable0$EventType,
|
||||
EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
|
||||
EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
|
||||
Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
|
||||
Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))
|
||||
textplot(t(rt0), show.colnames=F)
|
||||
title("Overall error rates by event type")
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(args[2])
|
||||
# plot per quality score recalibration table
|
||||
textplot(gsa.report$RecalTable1, show.rownames=F)
|
||||
title("Error rates by event type and initial quality score")
|
||||
|
||||
if ( ! is.na(args[3]) ) {
|
||||
dev.off()
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(args[2])
|
||||
}
|
||||
}
|
||||
|
|
@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
|
|||
|
||||
if ( requestedStrat == "Sample" ) {
|
||||
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
|
||||
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
|
||||
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
|
||||
} else { # by AlleleCount
|
||||
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
|
||||
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
|
||||
|
|
|
|||
|
|
@ -208,6 +208,7 @@ public class FastaSequenceIndexBuilder {
|
|||
break;
|
||||
}
|
||||
}
|
||||
in.close();
|
||||
return sequenceIndex;
|
||||
}
|
||||
catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -12,10 +12,10 @@ import java.util.*;
|
|||
*/
|
||||
public class Bases implements Iterable<Byte>
|
||||
{
|
||||
public static byte A = 'A';
|
||||
public static byte C = 'C';
|
||||
public static byte G = 'G';
|
||||
public static byte T = 'T';
|
||||
public static final byte A = 'A';
|
||||
public static final byte C = 'C';
|
||||
public static final byte G = 'G';
|
||||
public static final byte T = 'T';
|
||||
|
||||
public static final Bases instance = new Bases();
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.util.List;
|
||||
|
|
@ -147,6 +148,9 @@ public class ArgumentDefinition {
|
|||
this.exclusiveOf = exclusiveOf;
|
||||
this.validation = validation;
|
||||
this.validOptions = validOptions;
|
||||
|
||||
validateName(shortName);
|
||||
validateName(fullName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -192,6 +196,9 @@ public class ArgumentDefinition {
|
|||
else
|
||||
shortName = null;
|
||||
|
||||
validateName(shortName);
|
||||
validateName(fullName);
|
||||
|
||||
this.ioType = ioType;
|
||||
this.argumentType = argumentType;
|
||||
this.fullName = fullName;
|
||||
|
|
@ -277,4 +284,14 @@ public class ArgumentDefinition {
|
|||
String validation = (String)CommandLineUtils.getValue(annotation, "validation");
|
||||
return validation.trim().length() > 0 ? validation.trim() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure the argument's name is valid
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
private void validateName(final String name) {
|
||||
if ( name != null && name.startsWith("-") )
|
||||
throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,10 +55,8 @@ public class ArgumentDefinitionGroup implements Iterable<ArgumentDefinition> {
|
|||
* Does the name of this argument group match the name of another?
|
||||
*/
|
||||
public boolean groupNameMatches( ArgumentDefinitionGroup other ) {
|
||||
if( this.groupName == null && other.groupName == null )
|
||||
return true;
|
||||
if( this.groupName == null && other.groupName != null )
|
||||
return false;
|
||||
if( this.groupName == null )
|
||||
return other.groupName == null;
|
||||
return this.groupName.equals(other.groupName);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
/**
|
||||
* our log, which we want to capture anything from org.broadinstitute.sting
|
||||
*/
|
||||
protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
|
||||
protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
|
||||
|
||||
/**
|
||||
* Fetch the given descriptor from the descriptor repository.
|
||||
|
|
|
|||
|
|
@ -120,8 +120,8 @@ public abstract class ParsingMethod {
|
|||
*/
|
||||
private static final String TAG_TEXT = "[\\w\\-\\.\\=]*";
|
||||
|
||||
public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
|
||||
public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
|
||||
ArgumentDefinitions.FullNameDefinitionMatcher) {};
|
||||
public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
|
||||
public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
|
||||
ArgumentDefinitions.ShortNameDefinitionMatcher) {};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
|
||||
if ( getArgumentCollection().gatkKeyFile == null ) {
|
||||
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
|
||||
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
|
||||
"for more information and instructions on how to obtain a key.");
|
||||
"Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
|
||||
" for more information and instructions on how to obtain a key.");
|
||||
}
|
||||
else {
|
||||
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
|
||||
|
|
|
|||
|
|
@ -130,6 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
|
||||
// can't close tribble index when writing
|
||||
if ( message.indexOf("Unable to close index for") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
|
||||
|
||||
// disk is full
|
||||
if ( message.indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||
if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -233,10 +233,6 @@ public class GenomeAnalysisEngine {
|
|||
if (args.nonDeterministicRandomSeed)
|
||||
resetRandomGenerator(System.currentTimeMillis());
|
||||
|
||||
// TODO -- REMOVE ME WHEN WE STOP BCF testing
|
||||
if ( args.USE_SLOW_GENOTYPES )
|
||||
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
|
||||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (args.BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
|
||||
|
|
@ -797,6 +793,14 @@ public class GenomeAnalysisEngine {
|
|||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||
|
||||
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
|
||||
|
||||
if (argCollection.keepProgramRecords)
|
||||
removeProgramRecords = false;
|
||||
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
|
|
@ -813,7 +817,8 @@ public class GenomeAnalysisEngine {
|
|||
getWalkerBAQQualityMode(),
|
||||
refReader,
|
||||
getBaseRecalibration(),
|
||||
argCollection.defaultBaseQualities);
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -840,20 +845,9 @@ public class GenomeAnalysisEngine {
|
|||
SAMSequenceDictionary sequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
VCFHeader header = null;
|
||||
if ( getArguments().repairVCFHeader != null ) {
|
||||
try {
|
||||
final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
|
||||
header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
|
||||
pbs.close();
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
|
||||
}
|
||||
}
|
||||
final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
|
||||
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
|
||||
builder,
|
||||
|
|
|
|||
|
|
@ -57,8 +57,6 @@ public class GATKArgumentCollection {
|
|||
public GATKArgumentCollection() {
|
||||
}
|
||||
|
||||
public Map<String, String> walkerArgs = new HashMap<String, String>();
|
||||
|
||||
// parameters and their defaults
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
|
||||
public List<String> samFiles = new ArrayList<String>();
|
||||
|
|
@ -66,10 +64,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
||||
public Integer readBufferSize = null;
|
||||
|
||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
|
||||
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
|
||||
|
||||
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
|
||||
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
|
||||
public File gatkKeyFile = null;
|
||||
|
||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
||||
|
|
@ -249,6 +247,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
|
||||
public boolean removeProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
|
||||
public boolean keepProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
|
||||
public ValidationExclusion.TYPE unsafe;
|
||||
|
||||
|
|
@ -375,19 +379,5 @@ public class GATKArgumentCollection {
|
|||
@Hidden
|
||||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
|
||||
@Hidden
|
||||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
* VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these
|
||||
* to fill in any missing ones in input VCF files.
|
||||
*/
|
||||
@Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
|
||||
public File repairVCFHeader = null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 8/20/12
|
||||
* A collection of arguments that are common to the various callers.
|
||||
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
|
||||
*/
|
||||
|
||||
public class StandardCallerArgumentCollection {
|
||||
/**
|
||||
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
||||
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
|
||||
*/
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
||||
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
|
||||
/**
|
||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
||||
* is the default).
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
|
||||
/**
|
||||
* This argument allows you to emit low quality calls as filtered records.
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
|
||||
/**
|
||||
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||
*/
|
||||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
/**
|
||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||
* that you not play around with this parameter.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 3;
|
||||
}
|
||||
|
|
@ -118,7 +118,7 @@ class WindowedData {
|
|||
rec.getAlignmentStart(),
|
||||
stop);
|
||||
states = new ArrayList<RMDDataState>();
|
||||
if (provider != null && provider.getReferenceOrderedData() != null)
|
||||
if (provider.getReferenceOrderedData() != null)
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,6 +89,11 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final SAMFileReader.ValidationStringency validationStringency;
|
||||
|
||||
/**
|
||||
* Do we want to remove the program records from this data source?
|
||||
*/
|
||||
private final boolean removeProgramRecords;
|
||||
|
||||
/**
|
||||
* Store BAM indices for each reader present.
|
||||
*/
|
||||
|
|
@ -200,7 +205,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1);
|
||||
(byte) -1,
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -233,7 +239,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
|
|
@ -249,6 +256,7 @@ public class SAMDataSource {
|
|||
dispatcher = null;
|
||||
|
||||
validationStringency = strictness;
|
||||
this.removeProgramRecords = removeProgramRecords;
|
||||
if(readBufferSize != null)
|
||||
ReadShard.setReadBufferSize(readBufferSize);
|
||||
else {
|
||||
|
|
@ -748,7 +756,7 @@ public class SAMDataSource {
|
|||
private synchronized void createNewResource() {
|
||||
if(allResources.size() > maxEntries)
|
||||
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
|
||||
allResources.add(readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
|
@ -777,9 +785,11 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param readerIDs reads to load.
|
||||
* TODO: validationStringency is not used here
|
||||
* @param validationStringency validation stringency.
|
||||
* @param removeProgramRecords indicate whether to clear program records from the readers
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
|
||||
final int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
|
|
@ -790,6 +800,9 @@ public class SAMDataSource {
|
|||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
if (removeProgramRecords) {
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -56,30 +55,31 @@ public class ReferenceDataSource {
|
|||
private IndexedFastaSequenceFile reference;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
|
||||
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
|
||||
|
||||
/**
|
||||
* Create reference data source from fasta file
|
||||
* @param fastaFile Fasta file to be used as reference
|
||||
*/
|
||||
public ReferenceDataSource(File fastaFile) {
|
||||
|
||||
// does the fasta file exist? check that first...
|
||||
if (!fastaFile.exists())
|
||||
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
||||
|
||||
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
File dictFile;
|
||||
if (fastaFile.getAbsolutePath().endsWith("fa")) {
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
|
||||
}
|
||||
else
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
|
||||
final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
|
||||
|
||||
final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
|
||||
// determine the name for the dict file
|
||||
final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
|
||||
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
||||
|
||||
/*
|
||||
if index file does not exist, create it manually
|
||||
*/
|
||||
* if index file does not exist, create it manually
|
||||
*/
|
||||
if (!indexFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
|
||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
||||
try {
|
||||
|
|
@ -96,7 +96,7 @@ public class ReferenceDataSource {
|
|||
}
|
||||
catch(UserException e) {
|
||||
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
|
||||
throw e;
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
// If lock creation succeeded, the failure must have been generating the index.
|
||||
|
|
@ -115,6 +115,8 @@ public class ReferenceDataSource {
|
|||
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
|
||||
*/
|
||||
if (!dictFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
|
||||
|
||||
/*
|
||||
|
|
@ -219,9 +221,9 @@ public class ReferenceDataSource {
|
|||
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
|
||||
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
|
||||
shards.add(new LocusShard(parser,
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
}
|
||||
}
|
||||
return shards;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ import java.util.Collection;
|
|||
|
||||
/** Shards and schedules data in manageable chunks. */
|
||||
public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
|
||||
protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
|
||||
|
||||
/**
|
||||
* Counts the number of instances of the class that are currently alive.
|
||||
|
|
|
|||
|
|
@ -66,13 +66,13 @@ public class TreeReducer implements Callable {
|
|||
* @return Result of the reduce.
|
||||
*/
|
||||
public Object call() {
|
||||
Object result = null;
|
||||
Object result;
|
||||
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
||||
try {
|
||||
if( lhs == null )
|
||||
result = lhs.get();
|
||||
result = null;
|
||||
// todo -- what the hell is this above line? Shouldn't it be the two below?
|
||||
// if( lhs == null )
|
||||
// throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );
|
||||
|
|
|
|||
|
|
@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Filter out reads with wonky cigar strings.
|
||||
*
|
||||
* - No reads with Hard/Soft clips in the middle of the cigar
|
||||
* - No reads starting with deletions (with or without preceding clips)
|
||||
* - No reads ending in deletions (with or without follow-up clips)
|
||||
* - No reads that are fully hard or soft clipped
|
||||
* - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
|
||||
*
|
||||
* ps: apparently an empty cigar is okay...
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
|
|
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
|
|||
|
||||
public boolean filterOut(final SAMRecord rec) {
|
||||
final Cigar c = rec.getCigar();
|
||||
if( c.isEmpty() ) { return false; } // if there is no Cigar then it can't be bad
|
||||
|
||||
boolean previousElementWasIndel = false;
|
||||
CigarOperator lastOp = c.getCigarElement(0).getOperator();
|
||||
|
||||
if (lastOp == CigarOperator.D) // filter out reads starting with deletion
|
||||
return true;
|
||||
|
||||
for (CigarElement ce : c.getCigarElements()) {
|
||||
CigarOperator op = ce.getOperator();
|
||||
if (op == CigarOperator.D || op == CigarOperator.I) {
|
||||
if (previousElementWasIndel)
|
||||
return true; // filter out reads with adjacent I/D
|
||||
|
||||
previousElementWasIndel = true;
|
||||
}
|
||||
else // this is a regular base (match/mismatch/hard or soft clip)
|
||||
previousElementWasIndel = false; // reset the previous element
|
||||
|
||||
lastOp = op;
|
||||
// if there is no Cigar then it can't be bad
|
||||
if( c.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return lastOp == CigarOperator.D;
|
||||
Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
|
||||
|
||||
CigarOperator firstOp = CigarOperator.H;
|
||||
while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
|
||||
CigarOperator op = elementIterator.next().getOperator();
|
||||
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
if (firstOp != CigarOperator.H && op == CigarOperator.H) {
|
||||
return true;
|
||||
}
|
||||
firstOp = op;
|
||||
}
|
||||
|
||||
// No reads starting with deletions (with or without preceding clips)
|
||||
if (firstOp == CigarOperator.D) {
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
|
||||
boolean previousElementWasIndel = firstOp == CigarOperator.I;
|
||||
CigarOperator lastOp = firstOp;
|
||||
CigarOperator previousOp = firstOp;
|
||||
|
||||
while (elementIterator.hasNext()) {
|
||||
CigarOperator op = elementIterator.next().getOperator();
|
||||
|
||||
if (op != CigarOperator.S && op != CigarOperator.H) {
|
||||
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
|
||||
return true;
|
||||
|
||||
lastOp = op;
|
||||
|
||||
if (!hasMeaningfulElements && op.consumesReadBases()) {
|
||||
hasMeaningfulElements = true;
|
||||
}
|
||||
|
||||
if (op == CigarOperator.I || op == CigarOperator.D) {
|
||||
|
||||
// No reads that have consecutive indels in the cigar (II, DD, ID or DI)
|
||||
if (previousElementWasIndel) {
|
||||
return true;
|
||||
}
|
||||
previousElementWasIndel = true;
|
||||
}
|
||||
else {
|
||||
previousElementWasIndel = false;
|
||||
}
|
||||
}
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
|
||||
return true;
|
||||
}
|
||||
|
||||
previousOp = op;
|
||||
}
|
||||
|
||||
// No reads ending in deletions (with or without follow-up clips)
|
||||
// No reads that are fully hard or soft clipped
|
||||
return lastOp == CigarOperator.D || !hasMeaningfulElements;
|
||||
}
|
||||
}
|
||||
|
|
@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker {
|
|||
|
||||
try {
|
||||
tempFile = File.createTempFile( stub.getClass().getName(), null );
|
||||
tempFile.deleteOnExit();
|
||||
//tempFile.deleteOnExit();
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
if (stub.getGenerateMD5())
|
||||
factory.setCreateMd5File(true);
|
||||
// Adjust max records in RAM.
|
||||
// TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
|
||||
if(stub.getMaxRecordsInRam() != null)
|
||||
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
|
||||
|
||||
|
|
|
|||
|
|
@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.io.storage;
|
|||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -60,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
protected final File file;
|
||||
protected OutputStream stream;
|
||||
protected final VariantContextWriter writer;
|
||||
boolean closed = false;
|
||||
|
||||
/**
|
||||
* Constructs an object which will write directly into the output file provided by the stub.
|
||||
|
|
@ -81,6 +83,18 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an object which will redirect into a different file.
|
||||
* @param stub Stub to use when synthesizing file / header info.
|
||||
* @param tempFile File into which to direct the output data.
|
||||
*/
|
||||
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
|
||||
logger.debug("Creating temporary output file " + tempFile.getAbsolutePath() + " for VariantContext output.");
|
||||
this.file = tempFile;
|
||||
this.writer = vcfWriterToFile(stub, file, false);
|
||||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
/**
|
||||
* common initialization routine for multiple constructors
|
||||
* @param stub Stub to use when constructing the output file.
|
||||
|
|
@ -139,19 +153,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an object which will redirect into a different file.
|
||||
* @param stub Stub to use when synthesizing file / header info.
|
||||
* @param tempFile File into which to direct the output data.
|
||||
*/
|
||||
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
|
||||
logger.debug("Creating temporary VCF file " + tempFile.getAbsolutePath() + " for VCF output.");
|
||||
this.file = tempFile;
|
||||
this.writer = vcfWriterToFile(stub, file, false);
|
||||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
public void add(VariantContext vc) {
|
||||
writer.add(vc);
|
||||
}
|
||||
|
|
@ -172,20 +173,34 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
if(file != null)
|
||||
logger.debug("Closing temporary file " + file.getAbsolutePath());
|
||||
writer.close();
|
||||
closed = true;
|
||||
}
|
||||
|
||||
public void mergeInto(VariantContextWriterStorage target) {
|
||||
try {
|
||||
String sourceFilePath = file.getAbsolutePath();
|
||||
String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
|
||||
logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath));
|
||||
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
if ( ! closed )
|
||||
throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
|
||||
final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
|
||||
logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
|
||||
|
||||
// use the feature manager to determine the right codec for the tmp file
|
||||
// that way we don't assume it's a specific type
|
||||
final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file);
|
||||
if ( fd == null )
|
||||
throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file);
|
||||
|
||||
final FeatureCodec<VariantContext> codec = fd.getCodec();
|
||||
final AbstractFeatureReader<VariantContext> source =
|
||||
AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false);
|
||||
|
||||
for ( VariantContext vc : source.iterator() ) {
|
||||
for ( final VariantContext vc : source.iterator() ) {
|
||||
target.writer.add(vc);
|
||||
}
|
||||
|
||||
source.close();
|
||||
file.delete(); // this should be last to aid in debugging when the process fails
|
||||
} catch (UserException e) {
|
||||
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ import java.util.List;
|
|||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
|
||||
public static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
public static final String FORCE_BCF = "bcf";
|
||||
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||
|
||||
//
|
||||
|
|
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
|
||||
return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
|
||||
return Arrays.asList(
|
||||
createDefaultArgumentDefinition(source),
|
||||
createNoCommandLineHeaderArgumentDefinition(),
|
||||
createSitesOnlyArgumentDefinition(),
|
||||
createBCFArgumentDefinition() );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||
if(!source.isRequired())
|
||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
|
||||
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
|
||||
engine.addOutput(stub);
|
||||
return stub;
|
||||
}
|
||||
|
|
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
if(writerFile == null && !source.isRequired())
|
||||
throw new MissingArgumentValueException(defaultArgumentDefinition);
|
||||
|
||||
// Should we compress the output stream?
|
||||
boolean compress = isCompressed(writerFileName);
|
||||
|
||||
boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
|
||||
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
|
||||
|
||||
// Create a stub for the given object.
|
||||
VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes)
|
||||
: new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes);
|
||||
final VariantContextWriterStub stub = (writerFile != null)
|
||||
? new VariantContextWriterStub(engine, writerFile, argumentSources)
|
||||
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
|
||||
|
||||
stub.setCompressed(isCompressed(writerFileName));
|
||||
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
|
||||
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
|
||||
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
|
||||
|
||||
// WARNING: Side effects required by engine!
|
||||
parsingEngine.addTags(stub,getArgumentTags(matches));
|
||||
|
|
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression level argument for the BAM file.
|
||||
* @return Argument definition for the BAM file itself. Will not be null.
|
||||
* Creates the optional no_header argument for the VCF file.
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
|
|
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates the optional compression level argument for the BAM file.
|
||||
* @return Argument definition for the BAM file itself. Will not be null.
|
||||
* Creates the optional sites_only argument definition
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createSitesOnlyArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
|
|
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the optional bcf argument definition
|
||||
* @return Argument definition for the VCF file itself. Will not be null.
|
||||
*/
|
||||
private ArgumentDefinition createBCFArgumentDefinition() {
|
||||
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
|
||||
boolean.class,
|
||||
FORCE_BCF,
|
||||
FORCE_BCF,
|
||||
"force BCF output, regardless of the file's extension",
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the file will be compressed.
|
||||
* @param writerFileName Name of the file
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
|
|
@ -78,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
/**
|
||||
* Should we emit a compressed output stream?
|
||||
*/
|
||||
private final boolean isCompressed;
|
||||
private boolean isCompressed = false;
|
||||
|
||||
/**
|
||||
* A hack: push the argument sources into the VCF header so that the VCF header
|
||||
|
|
@ -89,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
/**
|
||||
* Should the header be written out? A hidden argument.
|
||||
*/
|
||||
private final boolean skipWritingCommandLineHeader;
|
||||
private boolean skipWritingCommandLineHeader = false;
|
||||
|
||||
/**
|
||||
* Should we not write genotypes even when provided?
|
||||
*/
|
||||
private final boolean doNotWriteGenotypes;
|
||||
private boolean doNotWriteGenotypes = false;
|
||||
|
||||
/**
|
||||
* Should we force BCF writing regardless of the file extension?
|
||||
*/
|
||||
private boolean forceBCF = false;
|
||||
|
||||
/**
|
||||
* Connects this stub with an external stream capable of serving the
|
||||
|
|
@ -107,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
*
|
||||
* @param engine engine.
|
||||
* @param genotypeFile file to (ultimately) create.
|
||||
* @param isCompressed should we compress the output stream?
|
||||
* @param argumentSources sources.
|
||||
* @param skipWritingCommandLineHeader skip writing header.
|
||||
* @param doNotWriteGenotypes do not write genotypes.
|
||||
*/
|
||||
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
|
||||
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = genotypeFile;
|
||||
this.genotypeStream = null;
|
||||
this.isCompressed = isCompressed;
|
||||
this.argumentSources = argumentSources;
|
||||
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -127,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
*
|
||||
* @param engine engine.
|
||||
* @param genotypeStream stream to (ultimately) write.
|
||||
* @param isCompressed should we compress the output stream?
|
||||
* @param argumentSources sources.
|
||||
* @param skipWritingCommandLineHeader skip writing header.
|
||||
* @param doNotWriteGenotypes do not write genotypes.
|
||||
*/
|
||||
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
|
||||
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
|
||||
this.engine = engine;
|
||||
this.genotypeFile = null;
|
||||
this.genotypeStream = new PrintStream(genotypeStream);
|
||||
this.isCompressed = isCompressed;
|
||||
this.argumentSources = argumentSources;
|
||||
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -166,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
return isCompressed;
|
||||
}
|
||||
|
||||
public void setCompressed(boolean compressed) {
|
||||
isCompressed = compressed;
|
||||
}
|
||||
|
||||
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
|
||||
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
|
||||
}
|
||||
|
||||
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
}
|
||||
|
||||
public void setForceBCF(boolean forceBCF) {
|
||||
this.forceBCF = forceBCF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the master sequence dictionary from the engine associated with this stub
|
||||
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||
|
|
@ -186,6 +196,9 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
|
||||
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
|
||||
options.add(Options.FORCE_BCF);
|
||||
|
||||
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
|
|
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
@ -279,7 +281,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
*/
|
||||
private void lazyLoadNextAlignmentContext() {
|
||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||
readStates.collectPendingReads();
|
||||
|
||||
final GenomeLoc location = getLocation();
|
||||
|
|
@ -378,7 +379,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,13 +86,14 @@ public class GATKRunReport {
|
|||
private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
|
||||
|
||||
// number of milliseconds before the S3 put operation is timed-out:
|
||||
private static final long S3PutTimeOut = 30 * 1000;
|
||||
private static final long S3PutTimeOut = 10 * 1000;
|
||||
|
||||
public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";
|
||||
|
||||
/**
|
||||
* our log
|
||||
*/
|
||||
protected static Logger logger = Logger.getLogger(GATKRunReport.class);
|
||||
protected static final Logger logger = Logger.getLogger(GATKRunReport.class);
|
||||
|
||||
|
||||
@Element(required = false, name = "id")
|
||||
|
|
|
|||
|
|
@ -163,43 +163,58 @@ public class VariantContextAdaptors {
|
|||
@Override
|
||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
|
||||
// add all of the alt alleles
|
||||
boolean sawNullAllele = refAllele.isNull();
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
Allele altAllele = Allele.create(alt, false);
|
||||
alleles.add(altAllele);
|
||||
if ( altAllele.isNull() )
|
||||
sawNullAllele = true;
|
||||
}
|
||||
final byte refBaseForIndel = ref.getBases()[index];
|
||||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.source(name).id(dbsnp.getRsID());
|
||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
|
||||
builder.alleles(alleles);
|
||||
builder.referenceBaseForIndel(refBaseForIndel);
|
||||
return builder.make();
|
||||
} else
|
||||
boolean addPaddingBase;
|
||||
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||
addPaddingBase = false;
|
||||
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
else
|
||||
return null; // can't handle anything else
|
||||
|
||||
Allele refAllele;
|
||||
if ( dbsnp.getNCBIRefBase().equals("-") )
|
||||
refAllele = Allele.create(refBaseForIndel, true);
|
||||
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
else
|
||||
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( Allele.wouldBeNullAllele(alt.getBytes()))
|
||||
alt = "";
|
||||
else if ( ! Allele.acceptableAlleleBases(alt) )
|
||||
return null;
|
||||
|
||||
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.source(name).id(dbsnp.getRsID());
|
||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
|
||||
builder.alleles(alleles);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
private static List<String> stripNullDashes(final List<String> alleles) {
|
||||
final List<String> newAlleles = new ArrayList<String>(alleles.size());
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.equals("-") )
|
||||
newAlleles.add("");
|
||||
else
|
||||
newAlleles.add(allele);
|
||||
}
|
||||
return newAlleles;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +309,6 @@ public class VariantContextAdaptors {
|
|||
int index = hapmap.getStart() - ref.getWindow().getStart();
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
||||
|
|
@ -351,7 +365,7 @@ public class VariantContextAdaptors {
|
|||
long end = hapmap.getEnd();
|
||||
if ( deletionLength > 0 )
|
||||
end += deletionLength;
|
||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,18 +85,16 @@ public class FeatureManager {
|
|||
|
||||
private final PluginManager<FeatureCodec> pluginManager;
|
||||
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
|
||||
private final VCFHeader headerForRepairs;
|
||||
private final boolean lenientVCFProcessing;
|
||||
|
||||
/**
|
||||
* Construct a FeatureManager without a master VCF header
|
||||
*/
|
||||
public FeatureManager() {
|
||||
this(null, false);
|
||||
this(false);
|
||||
}
|
||||
|
||||
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
|
||||
this.headerForRepairs = headerForRepairs;
|
||||
public FeatureManager(final boolean lenientVCFProcessing) {
|
||||
this.lenientVCFProcessing = lenientVCFProcessing;
|
||||
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
||||
|
|
@ -255,8 +253,6 @@ public class FeatureManager {
|
|||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
if ( codex instanceof VCFCodec )
|
||||
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
|
||||
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
|
||||
((AbstractVCFCodec)codex).disableOnTheFlyModifications();
|
||||
|
||||
|
|
|
|||
|
|
@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
* please talk through your approach with the SE team.
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public RMDTrackBuilder(final SAMSequenceDictionary dict,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final VCFHeader headerForRepairs,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
this.dict = dict;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
return featureManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as full constructor but makes one without a header for repairs
|
||||
* @param dict
|
||||
* @param genomeLocParser
|
||||
* @param validationExclusionType
|
||||
*/
|
||||
public RMDTrackBuilder(final SAMSequenceDictionary dict,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
this(dict, genomeLocParser, null, validationExclusionType);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a RMDTrack of the specified type
|
||||
*
|
||||
|
|
|
|||
|
|
@ -89,9 +89,9 @@ public class GATKReport {
|
|||
reader = new BufferedReader(new FileReader(file));
|
||||
reportHeader = reader.readLine();
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Could not open file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Could not read file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -208,11 +208,23 @@ public class GATKReportTable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
*
|
||||
* @param name the name of the table or column
|
||||
* @return true if the name is valid, false if otherwise
|
||||
* Create a new GATKReportTable with the same structure
|
||||
* @param tableToCopy
|
||||
*/
|
||||
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
|
||||
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
|
||||
addColumn(column.getColumnName(), column.getFormat());
|
||||
if ( copyData )
|
||||
throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
*
|
||||
* @param name the name of the table or column
|
||||
* @return true if the name is valid, false if otherwise
|
||||
*/
|
||||
private boolean isValidName(String name) {
|
||||
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
|
||||
Matcher m = p.matcher(name);
|
||||
|
|
@ -490,6 +502,17 @@ public class GATKReportTable {
|
|||
return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the given position in the table
|
||||
*
|
||||
* @param rowIndex the row ID
|
||||
* @param columnName the name of the column
|
||||
* @return the value stored at the specified position in the table
|
||||
*/
|
||||
public Object get(final int rowIndex, final String columnName) {
|
||||
return get(rowIndex, columnNameToIndex.get(columnName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the given position in the table
|
||||
*
|
||||
|
|
|
|||
|
|
@ -62,54 +62,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple utility class that makes it convenient to print unit adjusted times
|
||||
*/
|
||||
private static class MyTime {
|
||||
double t; // in Seconds
|
||||
int precision; // for format
|
||||
|
||||
public MyTime(double t, int precision) {
|
||||
this.t = t;
|
||||
this.precision = precision;
|
||||
}
|
||||
|
||||
public MyTime(double t) {
|
||||
this(t, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of 10000 s, returns 2.8 hours
|
||||
* @return
|
||||
*/
|
||||
public String toString() {
|
||||
double unitTime = t;
|
||||
String unit = "s";
|
||||
|
||||
if ( t > 120 ) {
|
||||
unitTime = t / 60; // minutes
|
||||
unit = "m";
|
||||
|
||||
if ( unitTime > 120 ) {
|
||||
unitTime /= 60; // hours
|
||||
unit = "h";
|
||||
|
||||
if ( unitTime > 100 ) {
|
||||
unitTime /= 24; // days
|
||||
unit = "d";
|
||||
|
||||
if ( unitTime > 20 ) {
|
||||
unitTime /= 7; // days
|
||||
unit = "w";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return String.format("%6."+precision+"f %s", unitTime, unit);
|
||||
}
|
||||
}
|
||||
|
||||
/** lock object to sure updates to history are consistent across threads */
|
||||
private static final Object lock = new Object();
|
||||
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
|
||||
|
|
@ -140,7 +92,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
GenomeLocSortedSet targetIntervals = null;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
protected GenomeAnalysisEngine engine;
|
||||
|
||||
|
|
@ -280,20 +232,20 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
|
||||
ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
|
||||
|
||||
final MyTime elapsed = new MyTime(last.elapsedSeconds);
|
||||
final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
|
||||
final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
|
||||
final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
|
||||
final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
|
||||
final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
|
||||
final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
|
||||
final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
|
||||
final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
|
||||
final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
|
||||
final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
|
||||
|
||||
if ( printProgress ) {
|
||||
lastProgressPrintTime = curTime;
|
||||
|
||||
// dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
|
||||
if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
|
||||
if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
|
||||
PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
|
||||
else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
|
||||
else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
|
||||
PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
|
||||
else
|
||||
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||
|
|
@ -308,8 +260,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
lastPerformanceLogPrintTime = curTime;
|
||||
synchronized(performanceLogLock) {
|
||||
performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
|
||||
elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
|
||||
fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
|
||||
elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed,
|
||||
bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(),
|
||||
timeToCompletion.getTimeInSeconds());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -401,7 +354,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
|
||||
synchronized(performanceLogLock) {
|
||||
// Ignore multiple calls to reset the same lock.
|
||||
if(performanceLogFile != null && performanceLogFile.equals(fileName))
|
||||
if(performanceLogFile != null && performanceLogFile.equals(file))
|
||||
return;
|
||||
|
||||
// Close an existing log
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
|
||||
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
|
||||
|
|
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
|
||||
profile.add(fakeLoc, isActiveProb);
|
||||
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
|
||||
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
|
||||
profile.add(location, isActiveProb);
|
||||
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
}
|
||||
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
|
|
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
|
||||
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
@Override
|
||||
protected String getTraversalType() {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> {
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(TraverseReadPairs.class);
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class);
|
||||
|
||||
@Override
|
||||
protected String getTraversalType() {
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
*/
|
||||
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(TraverseReads.class);
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReads.class);
|
||||
|
||||
@Override
|
||||
protected String getTraversalType() {
|
||||
|
|
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
|
||||
|
||||
ReadView reads = new ReadView(dataProvider);
|
||||
ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
|
||||
|
|
@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
ReferenceContext refContext = null;
|
||||
|
||||
// get the array of characters for the reference sequence, since we're a mapped read
|
||||
if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
|
||||
if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
|
||||
refContext = reference.getReferenceContext(read);
|
||||
|
||||
// update the number of reads we've seen
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
|
@ -27,10 +28,11 @@ import java.util.List;
|
|||
*/
|
||||
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
@ActiveRegionExtension(extension=50,maxRegion=1500)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
|
||||
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
||||
|
|
@ -72,7 +74,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
}
|
||||
|
||||
// Determine probability of active status over the AlignmentContext
|
||||
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||
|
|
|
|||
|
|
@ -574,7 +574,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
|
|||
}
|
||||
}
|
||||
|
||||
public class ReadClipperWithData extends ReadClipper {
|
||||
public static class ReadClipperWithData extends ReadClipper {
|
||||
private ClippingData data;
|
||||
|
||||
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
|
|
|
|||
|
|
@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
* Allow user to choose between a number of different data sources.
|
||||
*/
|
||||
public enum DataSource {
|
||||
/**
|
||||
* Does this walker require read (BAM) data to work?
|
||||
*/
|
||||
READS,
|
||||
|
||||
/**
|
||||
* Does this walker require reference data to work?
|
||||
*/
|
||||
REFERENCE,
|
||||
REFERENCE_BASES, // Do I actually need the reference bases passed to the walker?
|
||||
|
||||
/**
|
||||
* Does this walker require reference order data (VCF) to work?
|
||||
*/
|
||||
REFERENCE_ORDERED_DATA
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,9 +16,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -64,9 +64,17 @@ import java.util.List;
|
|||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
/**
|
||||
* In addition to the standard pileup output, adds 'verbose' output too. The verbose output contains the number of spanning deletions,
|
||||
* and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality. These per
|
||||
* read items are delimited with an '@' character.
|
||||
*/
|
||||
@Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
|
||||
public boolean SHOW_VERBOSE = false;
|
||||
|
||||
|
|
@ -116,8 +124,6 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
|
|||
return rodString;
|
||||
}
|
||||
|
||||
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
|
||||
|
||||
private static String createVerboseOutput(final ReadBackedPileup pileup) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
boolean isFirst = true;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
* Time: 2:52:28 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
public boolean requiresOrderedReads() { return false; }
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.REFERENCE})
|
||||
@Allows(DataSource.REFERENCE)
|
||||
public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 8/2/12
|
||||
* Time: 1:58 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Indicates that program records should be removed from SAM headers by default for this walker
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface RemoveProgramRecords {
|
||||
}
|
||||
|
|
@ -65,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
|
|||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
for (Map.Entry<Allele, Double> entry : el.entrySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
if (entry.getKey().isReference())
|
||||
refLikelihood = entry.getValue();
|
||||
else {
|
||||
double like = el.get(a);
|
||||
double like = entry.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,19 +22,10 @@ import java.util.Map;
|
|||
/**
|
||||
* Total (unfiltered) depth over all samples.
|
||||
*
|
||||
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||
* normally be excluded from the statistical calculations going into GQ and QUAL.
|
||||
*
|
||||
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
|
||||
* -dcov D is N * D
|
||||
* While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
|
||||
* internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
|
||||
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
|
||||
* N samples with -dcov D is N * D
|
||||
*/
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ import java.util.List;
|
|||
/**
|
||||
* The depth of coverage of each VCF allele in this sample.
|
||||
*
|
||||
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||
* The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
|
||||
* sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
|
||||
* Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
|
||||
* REF and ALT fields) is the unfiltered count of all reads that carried with them the
|
||||
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||
|
|
@ -35,17 +35,13 @@ import java.util.List;
|
|||
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
|
||||
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
|
||||
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
|
||||
* are unambiguously informative about the alternate allele). Because of this fact and
|
||||
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
|
||||
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
|
||||
* determine the genotype calls (see below).
|
||||
* determine the genotype calls.
|
||||
*/
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
private static final String REF_ALLELE = "REF";
|
||||
|
||||
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return;
|
||||
|
|
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
if ( vc.isSNP() )
|
||||
annotateSNP(stratifiedContext, vc, gb);
|
||||
else if ( vc.isIndel() )
|
||||
annotateIndel(stratifiedContext, vc, gb);
|
||||
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
|
||||
}
|
||||
|
||||
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
|
|
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
if ( pileup == null )
|
||||
return;
|
||||
|
||||
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
|
||||
alleleCounts.put(REF_ALLELE, 0);
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||
final Allele refAllele = vc.getReference();
|
||||
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
|
||||
if ( allele.isNoCall() ) {
|
||||
continue; // this does not look so good, should we die???
|
||||
}
|
||||
|
||||
alleleCounts.put(getAlleleRepresentation(allele), 0);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
alleleCounts.put(allele, 0);
|
||||
}
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.isBeforeInsertion() ) {
|
||||
|
||||
final String b = p.getEventBases();
|
||||
if ( alleleCounts.containsKey(b) ) {
|
||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
||||
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
|
||||
if ( alleleCounts.containsKey(insertion) ) {
|
||||
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
|
||||
}
|
||||
|
||||
} else if ( p.isBeforeDeletionStart() ) {
|
||||
if ( p.getEventLength() == refAllele.length() ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final String b = DEL;
|
||||
if ( alleleCounts.containsKey(b) ) {
|
||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
||||
}
|
||||
if ( p.getEventLength() == refAllele.length() - 1 ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final Allele deletion = Allele.create(refBase);
|
||||
if ( alleleCounts.containsKey(deletion) ) {
|
||||
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
|
||||
}
|
||||
}
|
||||
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
|
||||
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
|
||||
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
|
||||
}
|
||||
}
|
||||
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
||||
final int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(refAllele);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
|
||||
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private String getAlleleRepresentation(Allele allele) {
|
||||
if ( allele.isNull() ) { // deletion wrt the ref
|
||||
return DEL;
|
||||
} else { // insertion, pass actual bases
|
||||
return allele.getBaseString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
|
|
|
|||
|
|
@ -291,8 +291,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
int[][] table = new int[2][2];
|
||||
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(sample);
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
final AlignmentContext context = sample.getValue();
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
|
|
@ -313,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
for (Map.Entry<Allele,Double> entry : el.entrySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
if (entry.getKey().isReference())
|
||||
refLikelihood = entry.getValue();
|
||||
else {
|
||||
double like = el.get(a);
|
||||
double like = entry.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return map;
|
||||
}
|
||||
|
||||
private class HaplotypeComparator implements Comparator<Haplotype> {
|
||||
private static class HaplotypeComparator implements Comparator<Haplotype> {
|
||||
|
||||
public int compare(Haplotype a, Haplotype b) {
|
||||
if (a.getQualitySum() < b.getQualitySum())
|
||||
|
|
@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[el.size()];
|
||||
int i = 0;
|
||||
for (Allele a : el.keySet()) {
|
||||
scores[i++] = -el.get(a);
|
||||
for (Map.Entry<Allele, Double> a : el.entrySet()) {
|
||||
scores[i++] = -a.getValue();
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
|
|||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
if (a.getKey().isReference())
|
||||
refLikelihood = a.getValue();
|
||||
else {
|
||||
double like = el.get(a);
|
||||
double like = a.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
|||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
|
||||
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
if (a.isReference())
|
||||
refLikelihood = el.get(a);
|
||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
||||
if (a.getKey().isReference())
|
||||
refLikelihood = a.getValue();
|
||||
else {
|
||||
double like = el.get(a);
|
||||
double like = a.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
|
|
@ -100,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
|||
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
|
||||
final int numAlignedBases = getNumAlignedBases(p.getRead());
|
||||
|
||||
int rp = readPos;
|
||||
if (readPos > numAlignedBases / 2) {
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,8 +66,8 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
return map;
|
||||
}
|
||||
|
||||
public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
|
||||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
|
||||
protected static final VCFInfoHeaderLine[] descriptions = {
|
||||
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
|
||||
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -142,9 +141,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
|
||||
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));
|
||||
|
||||
// Open output file specified by output VCF ROD
|
||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
|
||||
if ( comp.isBound() ) {
|
||||
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
|
||||
|
|
@ -250,8 +246,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
||||
String refString = vc_input.getReference().getDisplayString();
|
||||
if (refString.length() == 0) // ref was null
|
||||
refString = Allele.NULL_ALLELE_STRING;
|
||||
|
||||
Allele bglAlleleA, bglAlleleB;
|
||||
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
|
|||
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
|
||||
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||
String bglPrintString;
|
||||
if (allele.isNoCall() || allele.isNull())
|
||||
if (allele.isNoCall())
|
||||
bglPrintString = "-";
|
||||
else
|
||||
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
||||
|
|
@ -351,7 +351,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public static class CachingFormatter {
|
||||
private int maxCacheSize = 0;
|
||||
private String format;
|
||||
private LRUCache<Double, String> cache;
|
||||
|
||||
|
|
@ -379,7 +378,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public CachingFormatter(String format, int maxCacheSize) {
|
||||
this.maxCacheSize = maxCacheSize;
|
||||
this.format = format;
|
||||
this.cache = new LRUCache<Double, String>(maxCacheSize);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
|
|||
|
||||
// write out the alleles at this site
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
|
||||
beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
|
||||
}
|
||||
|
||||
// write out sample level genotypes
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.commandline.Gatherer;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
|
|||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".original");
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".recal");
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
|||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -103,13 +107,13 @@ import java.util.ArrayList;
|
|||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
|
|
@ -132,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
// TODO -- remove me after the 2.1 release
|
||||
if ( getToolkit().getArguments().numberOfThreads > 1 )
|
||||
throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
|
||||
|
||||
// check for unsupported access
|
||||
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
|
||||
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
|
||||
|
|
@ -143,12 +151,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
|
||||
|
||||
if (RAC.LIST_ONLY) {
|
||||
RecalDataManager.listAvailableCovariates(logger);
|
||||
RecalUtils.listAvailableCovariates(logger);
|
||||
System.exit(0);
|
||||
}
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
|
||||
|
|
@ -222,17 +230,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
|
||||
if (readNotSeen(read)) {
|
||||
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
|
||||
RecalDataManager.parsePlatformForRead(read, RAC);
|
||||
if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
|
||||
RecalUtils.parsePlatformForRead(read, RAC);
|
||||
if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
|
||||
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
|
||||
continue;
|
||||
}
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||
}
|
||||
|
||||
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalDataManager.isColorSpaceConsistent(read, offset))
|
||||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalUtils.isColorSpaceConsistent(read, offset))
|
||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
}
|
||||
countedSites++;
|
||||
|
|
@ -242,9 +250,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
}
|
||||
|
||||
/**
|
||||
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
|
||||
* Initialize the reduce step by returning 0L
|
||||
*
|
||||
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker
|
||||
* @return returns 0L
|
||||
*/
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
|
|
@ -271,13 +279,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
public void onTraversalDone(Long result) {
|
||||
logger.info("Calculating quantized quality scores...");
|
||||
quantizeQualityScores();
|
||||
|
||||
logger.info("Writing recalibration report...");
|
||||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if (!RAC.NO_PLOTS) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
logger.info("Writing recalibration report...");
|
||||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
logger.info("Processed: " + result + " sites");
|
||||
}
|
||||
|
||||
|
|
@ -285,10 +296,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -309,7 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
|
||||
}
|
||||
|
||||
RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jan 6, 2010
|
||||
*
|
||||
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
|
||||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class Datum {
|
||||
|
||||
long numObservations; // number of bases seen in total
|
||||
long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Datum() {
|
||||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
}
|
||||
|
||||
public Datum(long numObservations, long numMismatches) {
|
||||
this.numObservations = numObservations;
|
||||
this.numMismatches = numMismatches;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
}
|
||||
|
||||
synchronized void increment(final boolean isError) {
|
||||
numObservations++;
|
||||
numMismatches += isError ? 1:0;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
|
||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
byte empiricalQualByte() {
|
||||
final double doubleMismatches = (double) (numMismatches);
|
||||
final double doubleObservations = (double) (numObservations);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof Datum))
|
||||
return false;
|
||||
Datum other = (Datum) o;
|
||||
return numMismatches == other.numMismatches && numObservations == other.numObservations;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatum extends Datum {
|
||||
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
estimatedQReported = reportedQuality;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public RecalDatum(final RecalDatum copy) {
|
||||
this.numObservations = copy.numObservations;
|
||||
this.numMismatches = copy.numMismatches;
|
||||
this.estimatedQReported = copy.estimatedQReported;
|
||||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
increment(other.numObservations, other.numMismatches);
|
||||
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void increment(final boolean isError) {
|
||||
super.increment(isError);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
protected final void calcEmpiricalQuality() {
|
||||
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||
}
|
||||
|
||||
public void setEstimatedQReported(final double estimatedQReported) {
|
||||
this.estimatedQReported = estimatedQReported;
|
||||
}
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
|
||||
public void setEmpiricalQuality(final double empiricalQuality) {
|
||||
this.empiricalQuality = empiricalQuality;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
if (empiricalQuality == UNINITIALIZED)
|
||||
calcEmpiricalQuality();
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = random.nextInt(maxErrors);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
return new RecalDatum(nObservations, nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't compare the estimated quality reported because it may be different when read from
|
||||
* report tables.
|
||||
*
|
||||
* @param o the other recal datum
|
||||
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof RecalDatum))
|
||||
return false;
|
||||
RecalDatum other = (RecalDatum) o;
|
||||
return super.equals(o) &&
|
||||
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
|
|
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
|
|||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
*/
|
||||
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
|
|
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
|
|||
* their color space tag can not be recalibrated.
|
||||
*/
|
||||
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
||||
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches
|
||||
|
|
@ -174,44 +175,44 @@ public class RecalibrationArgumentCollection {
|
|||
|
||||
public File recalibrationReport = null;
|
||||
|
||||
public GATKReportTable generateReportTable() {
|
||||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
argumentsTable.addColumn("Argument");
|
||||
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addRowID("covariate", true);
|
||||
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
||||
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
|
||||
argumentsTable.addRowID("no_standard_covs", true);
|
||||
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.addRowID("solid_recal_mode", true);
|
||||
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.addRowID("solid_nocall_strategy", true);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.addRowID("mismatches_context_size", true);
|
||||
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("indels_context_size", true);
|
||||
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("low_quality_tail", true);
|
||||
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.addRowID("default_platform", true);
|
||||
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.addRowID("force_platform", true);
|
||||
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("keep_intermediate_files", true);
|
||||
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||
argumentsTable.addRowID("no_plots", true);
|
||||
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -45,15 +47,17 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
@Output(required = true)
|
||||
private PrintStream out;
|
||||
|
||||
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
|
||||
private int coverageThreshold = 20;
|
||||
|
||||
@Override
|
||||
// Look to see if the region has sufficient coverage
|
||||
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
||||
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
|
||||
|
||||
// note the linear probability scale
|
||||
int coverageThreshold = 20;
|
||||
return Math.min((double) depth / coverageThreshold, 1);
|
||||
return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -74,9 +78,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
public Long reduce(final GenomeLoc value, Long reduce) {
|
||||
if (value != null) {
|
||||
out.println(value.toString());
|
||||
return reduce++;
|
||||
} else
|
||||
return reduce;
|
||||
reduce++;
|
||||
}
|
||||
return reduce;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -264,7 +264,7 @@ class SampleStatistics {
|
|||
return false;
|
||||
|
||||
// different contigs
|
||||
if (read.getMateReferenceIndex() != read.getReferenceIndex())
|
||||
if (!read.getMateReferenceIndex().equals(read.getReferenceIndex()))
|
||||
return false;
|
||||
|
||||
// unmapped
|
||||
|
|
|
|||
|
|
@ -104,7 +104,9 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
InputStream fstream = new BufferedInputStream(new FileInputStream(file));
|
||||
if ( !BlockCompressedInputStream.isValidFile(fstream) )
|
||||
return false;
|
||||
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
|
||||
final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream);
|
||||
BCIS.read(buffer, 0, BAM_MAGIC.length);
|
||||
BCIS.close();
|
||||
return Arrays.equals(buffer, BAM_MAGIC);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ public class DiffNode extends DiffValue {
|
|||
// X=(A=A B=B C=(D=D))
|
||||
String[] parts = tree.split("=", 2);
|
||||
if ( parts.length != 2 )
|
||||
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
|
||||
throw new ReviewedStingException("Unexpected tree structure: " + tree);
|
||||
String name = parts[0];
|
||||
String value = parts[1];
|
||||
|
||||
|
|
|
|||
|
|
@ -90,8 +90,10 @@ public class GATKReportDiffableReader implements DiffableReader {
|
|||
public boolean canRead(File file) {
|
||||
try {
|
||||
final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
|
||||
char[] buff = new char[HEADER.length()];
|
||||
new FileReader(file).read(buff, 0, HEADER.length());
|
||||
final char[] buff = new char[HEADER.length()];
|
||||
final FileReader FR = new FileReader(file);
|
||||
FR.read(buff, 0, HEADER.length());
|
||||
FR.close();
|
||||
String firstLine = new String(buff);
|
||||
return firstLine.startsWith(HEADER);
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -47,7 +47,10 @@ import java.util.List;
|
|||
* <p>
|
||||
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
||||
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
||||
* Note that if there are multiple variants at a site, it takes the first one seen.
|
||||
* Several important notes:
|
||||
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
|
||||
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
|
||||
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
|
||||
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
|
|
@ -102,16 +105,16 @@ public class FastaAlternateReference extends FastaReference {
|
|||
String refBase = String.valueOf((char)ref.getBase());
|
||||
|
||||
// Check to see if we have a called snp
|
||||
for ( VariantContext vc : tracker.getValues(variants) ) {
|
||||
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
|
||||
if ( vc.isFiltered() )
|
||||
continue;
|
||||
|
||||
if ( vc.isSimpleDeletion()) {
|
||||
deletionBasesRemaining = vc.getReference().length();
|
||||
deletionBasesRemaining = vc.getReference().length() - 1;
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if ( vc.isSimpleInsertion()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
} else if (vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
public enum Model {
|
||||
/** The default model with the best performance in all cases */
|
||||
EXACT,
|
||||
POOL
|
||||
EXACT
|
||||
}
|
||||
|
||||
protected int N;
|
||||
|
|
|
|||
|
|
@ -148,8 +148,8 @@ public class ConsensusAlleleCounter {
|
|||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
for (Map.Entry<String, Integer> s : consensusIndelStrings.entrySet()) {
|
||||
cList.add(new Pair<String, Integer>(s.getKey(), s.getValue()));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
|
|
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
|
|||
// get ref bases of accurate deletion
|
||||
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases, false)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
altAllele = Allele.create(ref.getBase(), false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
final String insertionBases = (char)ref.getBase() + s; // add reference padding
|
||||
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
altAllele = Allele.create(insertionBases, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
|
|
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
|
|||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(Arrays.asList(refAllele, altAllele));
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls) {
|
||||
vcs.add(builder.make());
|
||||
|
|
|
|||
|
|
@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
POOLBOTH
|
||||
GeneralPloidySNP,
|
||||
GeneralPloidyINDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
public enum GENOTYPING_MODE {
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private boolean DEBUG = false;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded;
|
||||
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
|
|
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
alleleList = pair.first;
|
||||
allelesArePadded = pair.second;
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
||||
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
||||
return null;
|
||||
|
||||
// start making the VariantContext
|
||||
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
||||
|
||||
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
|
||||
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
|
||||
final int eventLength = getEventLength(alleleList);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
|
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
|
||||
Allele refAllele = alleles.get(0);
|
||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
||||
if (allelesArePadded)
|
||||
endLoc++;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc loc,
|
||||
|
|
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
}
|
||||
|
||||
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC,
|
||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
boolean allelesArePadded = true;
|
||||
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
VariantContext vc = null;
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||
|
|
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
// ignore places where we don't have a variant
|
||||
if (vc == null)
|
||||
return new Pair<List<Allele>,Boolean>(alleles,false);
|
||||
return alleles;
|
||||
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
|
|
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
} else {
|
||||
alleles.addAll(vc.getAlleles());
|
||||
}
|
||||
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
||||
allelesArePadded = false;
|
||||
|
||||
|
||||
|
||||
} else {
|
||||
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
}
|
||||
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
|
||||
return alleles;
|
||||
}
|
||||
|
||||
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
|
||||
}
|
||||
|
||||
public class BAQedPileupElement extends PileupElement {
|
||||
public static class BAQedPileupElement extends PileupElement {
|
||||
public BAQedPileupElement( final PileupElement PE ) {
|
||||
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,11 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
||||
public class UnifiedArgumentCollection {
|
||||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||
|
||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
|
|
@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
|
||||
/**
|
||||
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
||||
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
|
||||
*/
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
||||
|
||||
/**
|
||||
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
|
||||
|
|
@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
|
||||
/**
|
||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
||||
* is the default).
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
|
||||
/**
|
||||
* This argument allows you to emit low quality calls as filtered records.
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
|
||||
/**
|
||||
* Note that calculating the SLOD increases the runtime by an appreciable amount.
|
||||
*/
|
||||
|
|
@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
|
||||
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
|
||||
|
||||
/**
|
||||
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||
*/
|
||||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
/**
|
||||
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
||||
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
|
||||
|
|
@ -107,18 +75,8 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
||||
public Double MAX_DELETION_FRACTION = 0.05;
|
||||
|
||||
/**
|
||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||
* that you not play around with this parameter.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 3;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
|
||||
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
|
||||
public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
|
||||
|
||||
// indel-related arguments
|
||||
|
|
@ -139,19 +97,18 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
||||
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
||||
|
||||
|
||||
/**
|
||||
* This argument informs the prior probability of having an indel at a site.
|
||||
*/
|
||||
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double INDEL_HETEROZYGOSITY = 1.0/8000;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
|
||||
@Advanced
|
||||
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
|
||||
public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
|
||||
@Advanced
|
||||
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
|
||||
public byte INDEL_GAP_OPEN_PENALTY = 45;
|
||||
|
||||
@Hidden
|
||||
|
|
@ -181,7 +138,6 @@ public class UnifiedArgumentCollection {
|
|||
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
|
||||
sample-level error model
|
||||
*/
|
||||
|
||||
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
|
||||
public boolean IGNORE_LANE_INFO = false;
|
||||
|
||||
|
|
@ -263,7 +219,6 @@ public class UnifiedArgumentCollection {
|
|||
uac.referenceSampleName = referenceSampleName;
|
||||
uac.samplePloidy = samplePloidy;
|
||||
uac.maxQualityScore = minQualityScore;
|
||||
uac.maxQualityScore = maxQualityScore;
|
||||
uac.phredScaledPrior = phredScaledPrior;
|
||||
uac.minPower = minPower;
|
||||
uac.minReferenceDepth = minReferenceDepth;
|
||||
|
|
@ -276,5 +231,16 @@ public class UnifiedArgumentCollection {
|
|||
return uac;
|
||||
}
|
||||
|
||||
public UnifiedArgumentCollection() { }
|
||||
|
||||
public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
|
||||
super();
|
||||
this.alleles = SCAC.alleles;
|
||||
this.GenotypingMode = SCAC.GenotypingMode;
|
||||
this.heterozygosity = SCAC.heterozygosity;
|
||||
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
|
||||
this.OutputMode = SCAC.OutputMode;
|
||||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ import java.util.*;
|
|||
* -o snps.raw.vcf \
|
||||
* -stand_call_conf [50.0] \
|
||||
* -stand_emit_conf 10.0 \
|
||||
* -dcov [50] \
|
||||
* -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
|
||||
* [-L targets.interval_list]
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
} else {
|
||||
// in full mode: check for consistency in ploidy/pool calling arguments
|
||||
// check for correct calculation models
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
// polyploidy requires POOL GL and AF calculation models to be specified right now
|
||||
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
|
||||
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
|
||||
|
|
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
|
||||
|
||||
}
|
||||
*/
|
||||
// get all of the unique sample names
|
||||
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
|
||||
samples.clear();
|
||||
|
|
@ -311,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
|
||||
// add the pool values for each genotype
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
}
|
||||
if (UAC.referenceSampleName != null) {
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue