Merge remote-tracking branch 'unstable/master'

This commit is contained in:
Eric Banks 2012-08-20 21:18:54 -04:00
commit 5b1781fdac
272 changed files with 5955 additions and 3968 deletions

View File

@ -76,7 +76,8 @@
<property name="staging.dir" value="staging" /> <property name="staging.dir" value="staging" />
<property name="default.executable" value="none" /> <property name="default.executable" value="none" />
<!-- Javadoc/Scaladoc directories --> <!-- GATKDocs/Javadoc/Scaladoc directories -->
<property name="gatkdocs.dir" value="gatkdocs" />
<property name="javadoc.dir" value="javadoc" /> <property name="javadoc.dir" value="javadoc" />
<property name="scaladoc.dir" value="scaladoc" /> <property name="scaladoc.dir" value="scaladoc" />
@ -92,12 +93,10 @@
<!-- To disable for test targets, run with -Duse.contracts=false --> <!-- To disable for test targets, run with -Duse.contracts=false -->
<!-- To enable for non-test targets, run with -Duse.contracts=true --> <!-- To enable for non-test targets, run with -Duse.contracts=true -->
<property name="java.contracts.dir" value="${build.dir}/java/contracts" /> <property name="java.contracts.dir" value="${build.dir}/java/contracts" />
<property name="contracts.version" value="1.0-20110609" /> <property name="contracts.version" value="1.0-r139" />
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/> <property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
<property name="contract.dump.dir" value="dump" /> <property name="contract.dump.dir" value="dump" />
<property name="gatkdocs.dir" value="gatkdocs" />
<!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') --> <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
<property name="halt" value="yes" /> <property name="halt" value="yes" />
@ -208,19 +207,19 @@
<include name="**/*.java" /> <include name="**/*.java" />
</fileset> </fileset>
<pathconvert property="external.build.dir"> <path id="external.build.dir">
<path path="${java.classes}"/> <path path="${java.classes}" />
</pathconvert> </path>
<pathconvert property="external.dist.dir"> <path id="external.dist.dir">
<path path="${dist.dir}" /> <path path="${dist.dir}" />
</pathconvert> </path>
<!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes --> <!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes -->
<pathconvert property="external.gatk.classpath"> <path id="external.gatk.classpath">
<path path="${java.classes}"/> <path path="${java.classes}"/>
<path refid="external.dependencies" /> <path refid="external.dependencies" />
</pathconvert> </path>
<!-- the path for resources that need to go into the GATK jar; <!-- the path for resources that need to go into the GATK jar;
any additional resources should go into this set. --> any additional resources should go into this set. -->
@ -430,9 +429,9 @@
<target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external"> <target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external">
<subant target="compile" genericantfile="build.xml"> <subant target="compile" genericantfile="build.xml">
<property name="build.dir" value="${external.build.dir}" /> <property name="build.dir" refid="external.build.dir" />
<property name="dist.dir" value="${external.dist.dir}" /> <property name="dist.dir" refid="external.dist.dir" />
<property name="gatk.classpath" value="${external.gatk.classpath}" /> <property name="gatk.classpath" refid="external.gatk.classpath" />
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" /> <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant> </subant>
</target> </target>
@ -680,9 +679,9 @@
</jar> </jar>
<subant target="dist" genericantfile="build.xml"> <subant target="dist" genericantfile="build.xml">
<property name="build.dir" value="${external.build.dir}" /> <property name="build.dir" refid="external.build.dir" />
<property name="dist.dir" value="${external.dist.dir}" /> <property name="dist.dir" refid="external.dist.dir" />
<property name="gatk.classpath" value="${external.gatk.classpath}" /> <property name="gatk.classpath" refid="external.gatk.classpath" />
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" /> <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant> </subant>
</target> </target>

View File

@ -52,7 +52,7 @@
<dependency org="gov.nist" name="Jama" rev="1.0.2"/> <dependency org="gov.nist" name="Jama" rev="1.0.2"/>
<!-- Dependencies for the graph aligner --> <!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/> <dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>
<!-- Dependencies for the html walker documention --> <!-- Dependencies for the html walker documention -->
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/> <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
@ -87,7 +87,7 @@
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/> <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
<!-- Contracts for Java and dependencies --> <!-- Contracts for Java and dependencies -->
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609"/> <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
<dependency org="asm" name="asm-all" rev="3.3.1"/> <dependency org="asm" name="asm-all" rev="3.3.1"/>
<!-- POI, for reading pipeline files --> <!-- POI, for reading pipeline files -->

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {

View File

@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;

View File

@ -1,6 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -30,24 +35,26 @@ public class ErrorModel {
private static final boolean compressRange = false; private static final boolean compressRange = false;
private static final double log10MinusE = Math.log10(Math.exp(1.0)); private static final double log10MinusE = Math.log10(Math.exp(1.0));
private static final boolean DEBUG = false;
/** /**
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score. * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
* *
* @param minQualityScore Minimum site quality score to evaluate * @param UAC Argument Collection
* @param maxQualityScore Maximum site quality score to evaluate
* @param phredScaledPrior Prior for site quality
* @param refSamplePileup Reference sample pileup * @param refSamplePileup Reference sample pileup
* @param refSampleVC VC with True alleles in reference sample pileup * @param refSampleVC VC with True alleles in reference sample pileup
* @param minPower Minimum power
*/ */
public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior, public ErrorModel (final UnifiedArgumentCollection UAC,
ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) { final ReadBackedPileup refSamplePileup,
this.maxQualityScore = maxQualityScore; VariantContext refSampleVC, final ReferenceContext refContext) {
this.minQualityScore = minQualityScore; this.maxQualityScore = UAC.maxQualityScore;
this.phredScaledPrior = phredScaledPrior; this.minQualityScore = UAC.minQualityScore;
log10minPower = Math.log10(minPower); this.phredScaledPrior = UAC.phredScaledPrior;
log10minPower = Math.log10(UAC.minPower);
PairHMMIndelErrorModel pairModel = null;
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
double[][] perReadLikelihoods = null;
double[] model = new double[maxQualityScore+1]; double[] model = new double[maxQualityScore+1];
Arrays.fill(model,Double.NEGATIVE_INFINITY); Arrays.fill(model,Double.NEGATIVE_INFINITY);
@ -61,11 +68,17 @@ public class ErrorModel {
break; break;
} }
} }
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
if (refSampleVC.isIndel()) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
} }
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) { }
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
for (byte q=minQualityScore; q<=maxQualityScore; q++) { for (byte q=minQualityScore; q<=maxQualityScore; q++) {
// maximum uncertainty if there's no ref data at site // maximum uncertainty if there's no ref data at site
model[q] = p; model[q] = p;
@ -75,22 +88,47 @@ public class ErrorModel {
else { else {
hasData = true; hasData = true;
int matches = 0; int matches = 0;
int coverage = refSamplePileup.getNumberOfElements(); int coverage = 0;
Allele refAllele = refSampleVC.getReference(); Allele refAllele = refSampleVC.getReference();
if (refSampleVC.isIndel()) {
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
if (!haplotypeMap.isEmpty())
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
}
int idx = 0;
for (PileupElement refPileupElement : refSamplePileup) { for (PileupElement refPileupElement : refSamplePileup) {
if (DEBUG)
System.out.println(refPileupElement.toString());
boolean isMatch = false; boolean isMatch = false;
for (Allele allele : refSampleVC.getAlleles()) for (Allele allele : refSampleVC.getAlleles()) {
isMatch |= pileupElementMatches(refPileupElement, allele, refAllele); boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
if (DEBUG) System.out.println(m);
isMatch |= m;
}
if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
if (!isInformativeElement(perAlleleLikelihoods))
matches++;
else
matches += (isMatch?1:0); matches += (isMatch?1:0);
// System.out.format("MATCH:%b\n",isMatch);
} else {
matches += (isMatch?1:0);
}
coverage++;
} }
int mismatches = coverage - matches; int mismatches = coverage - matches;
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches); //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
for (byte q=minQualityScore; q<=maxQualityScore; q++) { for (byte q=minQualityScore; q<=maxQualityScore; q++) {
if (coverage==0)
model[q] = p;
else
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
} }
this.refDepth = coverage; this.refDepth = coverage;
@ -101,6 +139,17 @@ public class ErrorModel {
} }
@Requires("likelihoods.length>0")
private boolean isInformativeElement(double[] likelihoods) {
// if likelihoods are the same, they're not informative
final double thresh = 0.1;
int maxIdx = MathUtils.maxElementIndex(likelihoods);
int minIdx = MathUtils.minElementIndex(likelihoods);
if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
return false;
else
return true;
}
/** /**
* Simple constructor that just takes a given log-probability vector as error model. * Simple constructor that just takes a given log-probability vector as error model.
* Only intended for unit testing, not general usage. * Only intended for unit testing, not general usage.
@ -115,23 +164,27 @@ public class ErrorModel {
} }
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) { public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
/* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", if (DEBUG)
System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(), pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString()); pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
*/
//pileupElement.
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
if (allele.isReference()) { if (allele.isReference()) {
// for a ref allele, any base mismatch or new indel is a mismatch. // for a ref allele, any base mismatch or new indel is a mismatch.
if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case if(allele.getBases().length>0)
return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]); // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
// could clean up/simplify this when unpadding is removed
return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
else else
// either null allele to compare, or ref/alt lengths are different (indel by definition). // either null allele to compare, or ref/alt lengths are different (indel by definition).
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
} }
// for non-ref alleles to compare:
if (refAllele.getBases().length == allele.getBases().length) if (refAllele.getBases().length == allele.getBases().length)
// alleles have the same length (eg snp or mnp) // alleles have the same length (eg snp or mnp)
return pileupElement.getBase() == allele.getBases()[0]; return pileupElement.getBase() == allele.getBases()[0];
@ -209,18 +262,19 @@ public class ErrorModel {
} }
public String toString() { public String toString() {
String result = "("; StringBuilder result = new StringBuilder("(");
boolean skipComma = true; boolean skipComma = true;
for (double v : probabilityVector.getProbabilityVector()) { for (double v : probabilityVector.getProbabilityVector()) {
if (skipComma) { if (skipComma) {
skipComma = false; skipComma = false;
} }
else { else {
result += ","; result.append(",");
} }
result += String.format("%.4f", v); result.append(String.format("%.4f", v));
} }
return result + ")"; result.append(")");
return result.toString();
} }
public static int getTotalReferenceDepth(HashMap<String, ErrorModel> perLaneErrorModels) { public static int getTotalReferenceDepth(HashMap<String, ErrorModel> perLaneErrorModels) {

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC; final protected UnifiedArgumentCollection UAC;
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false; private final static boolean VERBOSE = false;
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy; ploidy = UAC.samplePloidy;
this.UAC = UAC; this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( final double[] likelihoods : GLs ) { for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL); final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) { for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0) if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
return newPool; return newPool;
} }
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
/** /**
* *
* @param set ExactACset holding conformation to be computed * @param set ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
continue; continue;
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset); GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
} }
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K) // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K) // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1); GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true); final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) { while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector()); x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next(); firstIterator.next();
} }
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone(); final double[] y = yy.clone();
while(secondIterator.hasNext()) { while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector()); y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize output to -log10(choose(m1+m2,[k1 k2...]) // initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy); final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy); final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount); double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
// for current conformation, get all possible ways to break vector K into two components G1 and G2 // for current conformation, get all possible ways to break vector K into two components G1 and G2
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY; set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
while (innerIterator.hasNext()) { while (innerIterator.hasNext()) {
// check if breaking current conformation into g1 and g2 is feasible. // check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( numOriginalAltAlleles == numNewAltAlleles) { if ( numOriginalAltAlleles == numNewAltAlleles) {
newLikelihoods = originalLikelihoods; newLikelihoods = originalLikelihoods;
} else { } else {
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse); newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize // might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// find the genotype with maximum likelihoods // find the genotype with maximum likelihoods
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex); final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>(); final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
@ -681,8 +681,8 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
// per-pool logging of AC and AF // per-pool logging of AC and AF
gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
// remove PLs if necessary // remove PLs if necessary
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING) if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import java.util.*; import java.util.*;
public abstract class PoolGenotypeLikelihoods { public abstract class GeneralPloidyGenotypeLikelihoods {
protected final int numChromosomes; protected final int numChromosomes;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@ -67,7 +67,7 @@ public abstract class PoolGenotypeLikelihoods {
private static final boolean FAST_GL_COMPUTATION = true; private static final boolean FAST_GL_COMPUTATION = true;
// constructor with given logPL elements // constructor with given logPL elements
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy, public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) { final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
this.alleles = alleles; this.alleles = alleles;
this.nAlleles = alleles.size(); this.nAlleles = alleles.size();
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD); Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
} else { } else {
if (logLikelihoods.length != likelihoodDim) if (logLikelihoods.length != likelihoodDim)
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object"); throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed? log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
} }
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
final int numAlleles = currentState.length; final int numAlleles = currentState.length;
final int ploidy = restrictSumTo; final int ploidy = restrictSumTo;
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy); linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
} }
else else
throw new ReviewedStingException("BUG: Not supported"); throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes, public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) { final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes); int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
double[] newPLs = new double[newPLSize]; double[] newPLs = new double[newPLSize];
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
newCount[idx] = pVec[permutationKey[idx]]; newCount[idx] = pVec[permutationKey[idx]];
// get corresponding index from new count // get corresponding index from new count
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes); int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
newPLs[outputIdx] = pl; newPLs[outputIdx] = pl;
if (VERBOSE) { if (VERBOSE) {
System.out.println("Old Key:"+Arrays.toString(pVec)); System.out.println("Old Key:"+Arrays.toString(pVec));

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*; import java.util.*;
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
//protected Set<String> laneIDs; //protected Set<String> laneIDs;
public enum Model { public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
final protected UnifiedArgumentCollection UAC; final protected UnifiedArgumentCollection UAC;
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC,logger); super(UAC,logger);
this.UAC = UAC; this.UAC = UAC;
@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(), return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
referenceSampleVC.getAlleles()) referenceSampleVC.getAlleles())
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make()) .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
.make(); .make();
} }
@ -138,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
protected static class PoolGenotypeData { protected static class PoolGenotypeData {
public final String name; public final String name;
public final PoolGenotypeLikelihoods GL; public final GeneralPloidyGenotypeLikelihoods GL;
public final int depth; public final int depth;
public final List<Allele> alleles; public final List<Allele> alleles;
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) { public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
this.name = name; this.name = name;
this.GL = GL; this.GL = GL;
this.depth = depth; this.depth = depth;
@ -237,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO); final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
// actually compute likelihoods // actually compute likelihoods
final int nGoodBases = GL.add(pileup, UAC); final int nGoodBases = GL.add(pileup, UAC);
if ( nGoodBases > 0 ) if ( nGoodBases > 0 )
@ -247,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
// find the alternate allele(s) that we should be using // find the alternate allele(s) that we should be using
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs); final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
if (alleles == null || alleles.isEmpty())
return null;
// start making the VariantContext // start making the VariantContext
final GenomeLoc loc = ref.getLocus(); final GenomeLoc loc = ref.getLocus();
final int endLoc = getEndLocation(tracker, ref, alleles); final int endLoc = getEndLocation(tracker, ref, alleles);
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
for ( PoolGenotypeData sampleData : GLs ) { for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array // extract from multidimensional array
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes, final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
allAlleles, alleles); allAlleles, alleles);
// normalize in log space so that max element is zero. // normalize in log space so that max element is zero.
@ -313,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
refLanePileup = refPileup.getPileupForLane(laneID); refLanePileup = refPileup.getPileupForLane(laneID);
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles); //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower)); perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref));
} }
return perLaneErrorModels; return perLaneErrorModels;
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
Abstract methods - must be implemented in derived classes Abstract methods - must be implemented in derived classes
*/ */
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles, protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods, final double[] logLikelihoods,
final int ploidy, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final HashMap<String, ErrorModel> perLaneErrorModels,

View File

@ -18,14 +18,16 @@ import java.util.*;
* Time: 10:06 AM * Time: 10:06 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
final PairHMMIndelErrorModel pairModel; final PairHMMIndelErrorModel pairModel;
final LinkedHashMap<Allele, Haplotype> haplotypeMap; final LinkedHashMap<Allele, Haplotype> haplotypeMap;
final ReferenceContext refContext; final ReferenceContext refContext;
final int eventLength; final int eventLength;
double[][] readHaplotypeLikelihoods; double[][] readHaplotypeLikelihoods;
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles, final byte refBase;
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods, final double[] logLikelihoods,
final int ploidy, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final HashMap<String, ErrorModel> perLaneErrorModels,
@ -38,6 +40,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
this.haplotypeMap = haplotypeMap; this.haplotypeMap = haplotypeMap;
this.refContext = referenceContext; this.refContext = referenceContext;
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles); this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
// todo - not needed if indel alleles have base at current position
this.refBase = referenceContext.getBase();
} }
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
@ -138,10 +142,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size()); List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
if (!hasReferenceSampleData) { if (!hasReferenceSampleData) {
final int numHaplotypes = haplotypeMap.size();
final int readCounts[] = new int[pileup.getNumberOfElements()]; final int readCounts[] = new int[pileup.getNumberOfElements()];
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
n = readHaplotypeLikelihoods.length; n = readHaplotypeLikelihoods.length;
} else { } else {
Allele refAllele = null; Allele refAllele = null;
@ -161,7 +163,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
int idx =0; int idx =0;
for (Allele allele : alleles) { for (Allele allele : alleles) {
int cnt = numSeenBases.get(idx); int cnt = numSeenBases.get(idx);
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0)); numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
} }
n++; n++;

View File

@ -32,17 +32,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*; import java.util.*;
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel { public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel; private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded = false;
/* /*
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap = private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() { new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -60,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
} }
*/ */
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) { protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
super(UAC, logger); super(UAC, logger);
@ -70,20 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
} }
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() { protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods, final double[] logLikelihoods,
final int ploidy, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup, final boolean useBQAedPileup,
final ReferenceContext ref, final ReferenceContext ref,
final boolean ignoreLaneInformation){ final boolean ignoreLaneInformation){
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref); return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
} }
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker, protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
@ -94,12 +85,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
final List<Allele> allAllelesToUse){ final List<Allele> allAllelesToUse){
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
List<Allele> alleles = pair.first;
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
allelesArePadded = pair.second;
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
haplotypeMap.clear(); haplotypeMap.clear();
@ -131,6 +120,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
protected int getEndLocation(final RefMetaDataTracker tracker, protected int getEndLocation(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final List<Allele> allelesToUse) { final List<Allele> allelesToUse) {
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded); return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
} }
} }

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
* and posteriors given a pile of bases and quality scores * and posteriors given a pile of bases and quality scores
* *
*/ */
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ { public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
final List<Allele> myAlleles; final List<Allele> myAlleles;
final int[] alleleIndices; final int[] alleleIndices;
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
* @param useBQAedPileup Use BAQed pileup * @param useBQAedPileup Use BAQed pileup
* @param ignoreLaneInformation If true, lane info is ignored * @param ignoreLaneInformation If true, lane info is ignored
*/ */
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy, public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) { final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.useBAQedPileup = useBQAedPileup; this.useBAQedPileup = useBQAedPileup;
myAlleles = new ArrayList<Allele>(alleles); myAlleles = new ArrayList<Allele>(alleles);
refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref! Allele refAllele = alleles.get(0);
//sanity check: by construction, first allele should ALWAYS be the reference alleles
if (!refAllele.isReference())
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!
if (myAlleles.size() < BaseUtils.BASES.length) { if (myAlleles.size() < BaseUtils.BASES.length) {
// likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones, // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,

View File

@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*; import java.util.*;
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel { public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) { protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger); super(UAC, logger);
} }
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles, protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods, final double[] logLikelihoods,
final int ploidy, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup, final boolean useBQAedPileup,
final ReferenceContext ref, final ReferenceContext ref,
final boolean ignoreLaneInformation) { final boolean ignoreLaneInformation) {
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO); return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
} }
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker, protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,

View File

@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -57,8 +56,12 @@ public class GenotypingEngine {
// This function is the streamlined approach, currently not being used // This function is the streamlined approach, currently not being used
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc, public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) { final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser ) {
// Prepare the list of haplotype indices to genotype // Prepare the list of haplotype indices to genotype
final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>(); final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
@ -184,8 +187,13 @@ public class GenotypingEngine {
} }
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc, public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) { final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser,
final ArrayList<VariantContext> activeAllelesToGenotype ) {
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>(); final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
@ -220,7 +228,6 @@ public class GenotypingEngine {
} }
} }
// Walk along each position in the key set and create each event to be outputted // Walk along each position in the key set and create each event to be outputted
for( final int loc : startPosKeySet ) { for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
@ -317,7 +324,7 @@ public class GenotypingEngine {
} }
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
final int MAX_SIZE_TO_COMBINE = 10; final int MAX_SIZE_TO_COMBINE = 15;
final double MERGE_EVENTS_R2_THRESHOLD = 0.95; final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
if( startPosKeySet.size() <= 1 ) { return; } if( startPosKeySet.size() <= 1 ) { return; }
@ -334,10 +341,10 @@ public class GenotypingEngine {
boolean isBiallelic = true; boolean isBiallelic = true;
VariantContext thisVC = null; VariantContext thisVC = null;
VariantContext nextVC = null; VariantContext nextVC = null;
int x11 = 0; double x11 = Double.NEGATIVE_INFINITY;
int x12 = 0; double x12 = Double.NEGATIVE_INFINITY;
int x21 = 0; double x21 = Double.NEGATIVE_INFINITY;
int x22 = 0; double x22 = Double.NEGATIVE_INFINITY;
for( final Haplotype h : haplotypes ) { for( final Haplotype h : haplotypes ) {
// only make complex substitutions out of consecutive biallelic sites // only make complex substitutions out of consecutive biallelic sites
@ -360,21 +367,24 @@ public class GenotypingEngine {
} }
} }
// count up the co-occurrences of the events for the R^2 calculation // count up the co-occurrences of the events for the R^2 calculation
// BUGBUG: use haplotype likelihoods per-sample to make this more accurate final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
haplotypeList.add(h);
for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
if( thisHapVC == null ) { if( thisHapVC == null ) {
if( nextHapVC == null ) { x11++; } if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12++; } else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
} else { } else {
if( nextHapVC == null ) { x21++; } if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
else { x22++; } else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
}
} }
} }
if( thisVC == null || nextVC == null ) { if( thisVC == null || nextVC == null ) {
continue; continue;
//throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
} }
if( isBiallelic ) { if( isBiallelic ) {
final double R2 = calculateR2LD( x11, x12, x21, x22 ); final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
if( DEBUG ) { if( DEBUG ) {
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2)); System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
System.out.println("-- " + thisVC); System.out.println("-- " + thisVC);
@ -419,24 +429,21 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart(); final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart(); final int nextStart = nextVC.getStart();
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); byte[] refBases = new byte[]{};
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()]; final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte); refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte); altBases = ArrayUtils.add(altBases, refByte);
} }
if( nextVC.hasReferenceBaseForIndel() ) { refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0; int iii = 0;
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
} }
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>(); final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -445,12 +452,11 @@ public class GenotypingEngine {
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
} }
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"}) protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) { final double total = x11 + x12 + x21 + x22;
final int total = x11 + x12 + x21 + x22; final double pa1b1 = x11 / total;
final double pa1b1 = ((double) x11) / ((double) total); final double pa1b2 = x12 / total;
final double pa1b2 = ((double) x12) / ((double) total); final double pa2b1 = x21 / total;
final double pa2b1 = ((double) x21) / ((double) total);
final double pa1 = pa1b1 + pa1b2; final double pa1 = pa1b1 + pa1b2;
final double pb1 = pa1b1 + pa2b1; final double pb1 = pa1b1 + pa2b1;
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) ); return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
@ -530,35 +536,37 @@ public class GenotypingEngine {
final int elementLength = ce.getLength(); final int elementLength = ce.getLength();
switch( ce.getOperator() ) { switch( ce.getOperator() ) {
case I: case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ); {
boolean allN = true;
for( final byte b : insertionBases ) {
if( b != (byte) 'N' ) {
allN = false;
break;
}
}
if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>(); final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1; final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
insertionAlleles.add( Allele.create(refByte, true) );
}
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else { } else {
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) ); byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
if( BaseUtils.isAllRegularBases(insertionBases) ) {
insertionAlleles.add( Allele.create(insertionBases, false) ); insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
} }
}
if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} }
alignmentPos += elementLength; alignmentPos += elementLength;
break; break;
}
case S: case S:
{
alignmentPos += elementLength; alignmentPos += elementLength;
break; break;
}
case D: case D:
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength ); {
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>(); final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1; final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen? // BUGBUG: how often does this symbolic deletion allele case happen?
@ -568,13 +576,20 @@ public class GenotypingEngine {
// deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); // deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
//} else { //} else {
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
deletionAlleles.add( Allele.create(deletionBases, true) ); deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) ); deletionAlleles.add( Allele.create(refByte, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
}
//} //}
refPos += elementLength; refPos += elementLength;
break; break;
}
case M: case M:
case EQ:
case X:
{
int numSinceMismatch = -1; int numSinceMismatch = -1;
int stopOfMismatch = -1; int stopOfMismatch = -1;
int startOfMismatch = -1; int startOfMismatch = -1;
@ -597,11 +612,13 @@ public class GenotypingEngine {
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>(); final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refBases, true ) ); snpAlleles.add( Allele.create( refBases, true ) );
snpAlleles.add( Allele.create( mismatchBases, false ) ); snpAlleles.add( Allele.create( mismatchBases, false ) );
final int snpStart = refLoc.getStart() + refPosStartOfMismatch; final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
}
numSinceMismatch = -1; numSinceMismatch = -1;
stopOfMismatch = -1; stopOfMismatch = -1;
startOfMismatch = -1; startOfMismatch = -1;
@ -611,6 +628,7 @@ public class GenotypingEngine {
alignmentPos++; alignmentPos++;
} }
break; break;
}
case N: case N:
case H: case H:
case P: case P:

View File

@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -56,6 +58,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -103,7 +106,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS) @PartitionBy(PartitionType.LOCUS)
@ActiveRegionExtension(extension=65, maxRegion=275) @ActiveRegionExtension(extension=65, maxRegion=300)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible { public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/** /**
@ -126,9 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
protected int MIN_PRUNE_FACTOR = 1; protected int MIN_PRUNE_FACTOR = 1;
@Advanced
@Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false) @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
protected boolean GENOTYPE_FULL_ACTIVE_REGION = false; protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
@Advanced
@Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false) @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
@ -139,9 +144,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
@Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
protected boolean USE_EXPANDED_TRIGGER_SET = false;
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
protected boolean USE_ALLELES_TRIGGER = false; protected boolean USE_ALLELES_TRIGGER = false;
@ -188,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
protected String[] annotationClassesToUse = { "Standard" }; protected String[] annotationClassesToUse = { "Standard" };
@ArgumentCollection @ArgumentCollection
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
// the calculation arguments // the calculation arguments
private UnifiedGenotyperEngine UG_engine = null; private UnifiedGenotyperEngine UG_engine = null;
@ -239,12 +241,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
samplesList.addAll( samples ); samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model // initialize the UnifiedGenotyper Engine which is used to call into the exact model
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
// initialize the output VCF header // initialize the output VCF header
@ -308,8 +310,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
public boolean wantsNonPrimaryReads() { return true; } public boolean wantsNonPrimaryReads() { return true; }
@Override @Override
@Ensures({"result >= 0.0", "result <= 1.0"}) @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
@ -318,56 +320,52 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
} }
} }
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
return 1.0; return new ActivityProfileResult(1.0);
} }
} }
if( USE_ALLELES_TRIGGER ) { if( USE_ALLELES_TRIGGER ) {
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
} }
if( context == null ) { return 0.0; } if( context == null ) { return new ActivityProfileResult(0.0); }
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
noCall.add(Allele.NO_CALL); noCall.add(Allele.NO_CALL);
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
for( final String sample : splitContexts.keySet() ) { final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
Arrays.fill(genotypeLikelihoods, 0.0); Arrays.fill(genotypeLikelihoods, 0.0);
for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) { for( final PileupElement p : sample.getValue().getBasePileup() ) {
final byte qual = ( USE_EXPANDED_TRIGGER_SET ? final byte qual = p.getQual();
( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() ) if( p.isDeletion() || qual > (byte) 18) {
: p.getQual() );
if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) {
int AA = 0; final int AB = 1; int BB = 2; int AA = 0; final int AB = 1; int BB = 2;
if( USE_EXPANDED_TRIGGER_SET ) {
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ||
(!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) {
AA = 2;
BB = 0;
}
} else {
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
AA = 2; AA = 2;
BB = 0; BB = 0;
if( p.isNextToSoftClip() ) {
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
} }
} }
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual); genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
} }
} }
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
} }
final ArrayList<Allele> alleles = new ArrayList<Allele>(); final ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE ); alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
} }
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
@ -416,15 +414,17 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult : for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() ) ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) { : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); } if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult ); final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst()); final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
// add some custom annotations to the calls
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes()); final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
if( !GENOTYPE_FULL_ACTIVE_REGION ) {
// add some custom annotations to the calls
// Calculate the number of variants on the haplotype // Calculate the number of variants on the haplotype
int maxNumVar = 0; int maxNumVar = 0;
for( final Allele allele : callResult.getFirst().getAlleles() ) { for( final Allele allele : callResult.getFirst().getAlleles() ) {
@ -456,6 +456,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
if( annotatedCall.hasAttribute("QD") ) { if( annotatedCall.hasAttribute("QD") ) {
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) ); myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
} }
}
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() ); vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
} }
@ -493,7 +494,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); } if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>(); final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) ); final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
activeRegion.clearReads(); activeRegion.clearReads();
@ -522,7 +523,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(); final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) { for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec); readsToRemove.add(rec);
} }
} }

View File

@ -82,11 +82,11 @@ public class KBestPaths {
} }
} }
protected static class PathComparatorLowestEdge implements Comparator<Path> { //protected static class PathComparatorLowestEdge implements Comparator<Path> {
public int compare(final Path path1, final Path path2) { // public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge; // return path2.lowestEdge - path1.lowestEdge;
} // }
} //}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) { public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); } if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -99,7 +99,7 @@ public class KBestPaths {
} }
} }
Collections.sort(bestPaths, new PathComparatorLowestEdge() ); Collections.sort(bestPaths, new PathComparatorTotalScore() );
Collections.reverse(bestPaths); Collections.reverse(bestPaths);
return bestPaths.subList(0, Math.min(k, bestPaths.size())); return bestPaths.subList(0, Math.min(k, bestPaths.size()));
} }
@ -114,8 +114,8 @@ public class KBestPaths {
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) { if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) { if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths // clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorLowestEdge() ); Collections.sort(bestPaths, new PathComparatorTotalScore() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
} }
bestPaths.add(path); bestPaths.add(path);
} else if( n.val > 10000) { } else if( n.val > 10000) {

View File

@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -50,18 +51,17 @@ public class LikelihoodCalculationEngine {
} }
public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) { public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
final int numHaplotypes = haplotypes.size();
int X_METRIC_LENGTH = 0; int X_METRIC_LENGTH = 0;
for( final String sample : perSampleReadList.keySet() ) { for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final GATKSAMRecord read : perSampleReadList.get(sample) ) { for( final GATKSAMRecord read : sample.getValue() ) {
final int readLength = read.getReadLength(); final int readLength = read.getReadLength();
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
} }
} }
int Y_METRIC_LENGTH = 0; int Y_METRIC_LENGTH = 0;
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { for( final Haplotype h : haplotypes ) {
final int haplotypeLength = haplotypes.get(jjj).getBases().length; final int haplotypeLength = h.getBases().length;
if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; } if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
} }
@ -90,8 +90,10 @@ public class LikelihoodCalculationEngine {
final int numHaplotypes = haplotypes.size(); final int numHaplotypes = haplotypes.size();
final int numReads = reads.size(); final int numReads = reads.size();
final double[][] readLikelihoods = new double[numHaplotypes][numReads]; final double[][] readLikelihoods = new double[numHaplotypes][numReads];
final int[][] readCounts = new int[numHaplotypes][numReads];
for( int iii = 0; iii < numReads; iii++ ) { for( int iii = 0; iii < numReads; iii++ ) {
final GATKSAMRecord read = reads.get(iii); final GATKSAMRecord read = reads.get(iii);
final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
final byte[] overallGCP = new byte[read.getReadLength()]; final byte[] overallGCP = new byte[read.getReadLength()];
Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
@ -103,7 +105,7 @@ public class LikelihoodCalculationEngine {
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
} }
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
@ -114,10 +116,11 @@ public class LikelihoodCalculationEngine {
readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(), readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
readQuals, readInsQuals, readDelQuals, overallGCP, readQuals, readInsQuals, readDelQuals, overallGCP,
haplotypeStart, matchMetricArray, XMetricArray, YMetricArray); haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
readCounts[jjj][iii] = readCount;
} }
} }
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] ); haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
} }
} }
@ -143,9 +146,19 @@ public class LikelihoodCalculationEngine {
return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping ); return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
} }
// This function takes just a single sample and a haplotypeMapping
@Requires({"haplotypeMapping.size() > 0"}) @Requires({"haplotypeMapping.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"}) @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) { public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
final TreeSet<String> sampleSet = new TreeSet<String>();
sampleSet.add(sample);
return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
}
// This function takes a set of samples to pool over and a haplotypeMapping
@Requires({"haplotypeMapping.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
final int numHaplotypes = haplotypeMapping.size(); final int numHaplotypes = haplotypeMapping.size();
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes]; final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
@ -154,46 +167,7 @@ public class LikelihoodCalculationEngine {
} }
// compute the diploid haplotype likelihoods // compute the diploid haplotype likelihoods
for( int iii = 0; iii < numHaplotypes; iii++ ) { // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
for( int jjj = 0; jjj <= iii; jjj++ ) {
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
double haplotypeLikelihood = 0.0;
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup.
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
}
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
}
}
}
}
// normalize the diploid likelihoods matrix
return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
}
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final Set<String> samples ) {
// set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
for( final Haplotype h : haplotypes ) {
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
list.add(h);
haplotypeMapping.add(list);
}
final int numHaplotypes = haplotypeMapping.size();
final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
for( int iii = 0; iii < numHaplotypes; iii++ ) {
Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
}
// compute the diploid haplotype likelihoods
for( int iii = 0; iii < numHaplotypes; iii++ ) { for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) { for( int jjj = 0; jjj <= iii; jjj++ ) {
for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) { for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
@ -201,11 +175,12 @@ public class LikelihoodCalculationEngine {
double haplotypeLikelihood = 0.0; double haplotypeLikelihood = 0.0;
for( final String sample : samples ) { for( final String sample : samples ) {
final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
// First term is approximated by Jacobian log with table lookup. // First term is approximated by Jacobian log with table lookup.
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF; haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
} }
} }
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
@ -306,12 +281,19 @@ public class LikelihoodCalculationEngine {
final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>(); final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add(0); // always start with the reference haplotype bestHaplotypesIndexList.add(0); // always start with the reference haplotype
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together // set up the default 1-to-1 haplotype mapping object
final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
for( final Haplotype h : haplotypes ) {
final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
list.add(h);
haplotypeMapping.add(list);
}
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
int hap1 = 0; int hap1 = 0;
int hap2 = 0; int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY; //double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 ); final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY; double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) { for( int iii = 0; iii < numHaplotypes; iii++ ) {
@ -343,9 +325,9 @@ public class LikelihoodCalculationEngine {
public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) { public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>(); final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst()); final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
for( final String sample : perSampleReadList.keySet() ) { for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>(); final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
final ArrayList<GATKSAMRecord> readsForThisSample = perSampleReadList.get(sample); final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) { for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same! final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
@ -355,7 +337,7 @@ public class LikelihoodCalculationEngine {
for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
double maxLikelihood = Double.NEGATIVE_INFINITY; double maxLikelihood = Double.NEGATIVE_INFINITY;
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object) for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
final double likelihood = h.getReadLikelihoods(sample)[iii]; final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
if( likelihood > maxLikelihood ) { if( likelihood > maxLikelihood ) {
maxLikelihood = likelihood; maxLikelihood = likelihood;
} }
@ -390,13 +372,13 @@ public class LikelihoodCalculationEngine {
readList = new ArrayList<GATKSAMRecord>(); readList = new ArrayList<GATKSAMRecord>();
alleleReadMap.put(Allele.NO_CALL, readList); alleleReadMap.put(Allele.NO_CALL, readList);
} }
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) { for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
readList.add(read); readList.add(read);
} }
} }
returnMap.put(sample, alleleReadMap); returnMap.put(sample.getKey(), alleleReadMap);
} }
return returnMap; return returnMap;
} }

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -68,7 +69,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
} }
private void createDeBruijnGraphs( final ArrayList<GATKSAMRecord> reads, final Haplotype refHaplotype ) { protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
graphs.clear(); graphs.clear();
// create the graph // create the graph
@ -161,7 +162,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
} }
} }
private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final ArrayList<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) { private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
final byte[] refSequence = refHaplotype.getBases(); final byte[] refSequence = refHaplotype.getBases();
if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) { if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
final int kmersInSequence = refSequence.length - KMER_LENGTH + 1; final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
@ -183,6 +184,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final GATKSAMRecord read : reads ) { for( final GATKSAMRecord read : reads ) {
final byte[] sequence = read.getReadBases(); final byte[] sequence = read.getReadBases();
final byte[] qualities = read.getBaseQualities(); final byte[] qualities = read.getBaseQualities();
final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not readuced
if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) { if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
final int kmersInSequence = sequence.length - KMER_LENGTH + 1; final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
for( int iii = 0; iii < kmersInSequence - 1; iii++ ) { for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {
@ -194,6 +196,14 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
break; break;
} }
} }
int countNumber = 1;
if (read.isReducedRead()) {
// compute mean number of reduced read counts in current kmer span
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
// precise rounding can make a difference with low consensus counts
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
}
if( !badKmer ) { if( !badKmer ) {
// get the kmers // get the kmers
final byte[] kmer1 = new byte[KMER_LENGTH]; final byte[] kmer1 = new byte[KMER_LENGTH];
@ -201,6 +211,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final byte[] kmer2 = new byte[KMER_LENGTH]; final byte[] kmer2 = new byte[KMER_LENGTH];
System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH); System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
for (int k=0; k < countNumber; k++)
addKmersToGraph(graph, kmer1, kmer2, false); addKmersToGraph(graph, kmer1, kmer2, false);
} }
} }
@ -230,7 +241,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return true; return true;
} }
private void printGraphs() { protected void printGraphs() {
int count = 0; int count = 0;
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) { for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
GRAPH_WRITER.println("digraph kmer" + count++ +" {"); GRAPH_WRITER.println("digraph kmer" + count++ +" {");
@ -281,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -311,7 +322,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
} }
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) { private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart; if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) ); haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );

View File

@ -50,20 +50,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000"; String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{ return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
}; };
} }
@ -74,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest {
Arrays.asList(params.md5)); Arrays.asList(params.md5));
executeTest("testBQSR-"+params.args, spec).getFirst(); executeTest("testBQSR-"+params.args, spec).getFirst();
WalkerTestSpec specNT2 = new WalkerTestSpec( // TODO -- re-enable once parallelization is fixed in BaseRecalibrator
params.getCommandLine() + " -nt 2", //WalkerTestSpec specNT2 = new WalkerTestSpec(
Arrays.asList(params.md5)); // params.getCommandLine() + " -nt 2",
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst(); // Arrays.asList(params.md5));
//executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
} }
@Test @Test
@ -94,6 +96,20 @@ public class BQSRIntegrationTest extends WalkerTest {
executeTest("testBQSRFailWithoutDBSNP", spec); executeTest("testBQSRFailWithoutDBSNP", spec);
} }
@Test
public void testBQSRFailWithSolidNoCall() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
" -T BaseRecalibrator" +
" -R " + b36KGReference +
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.class);
executeTest("testBQSRFailWithSolidNoCall", spec);
}
private static class PRTest { private static class PRTest {
final String args; final String args;
final String md5; final String md5;

View File

@ -19,7 +19,7 @@ import java.util.Arrays;
* Time: 7:44 AM * Time: 7:44 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class PoolAFCalculationModelUnitTest extends BaseTest { public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1; static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2; static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
public void testGLs(GetGLsTest cfg) { public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles); final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size()); final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors double[] priors = new double[len]; // flat priors
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result); GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
int nameIndex = 1; int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));

View File

@ -41,7 +41,7 @@ import java.io.PrintStream;
import java.util.*; import java.util.*;
public class PoolGenotypeLikelihoodsUnitTest { public class GeneralPloidyGenotypeLikelihoodsUnitTest {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class); final Logger logger = Logger.getLogger(Walker.class);
@ -49,11 +49,18 @@ public class PoolGenotypeLikelihoodsUnitTest {
private static final boolean SIMULATE_NOISY_PILEUP = false; private static final boolean SIMULATE_NOISY_PILEUP = false;
private static final int NUM_SIMULATED_OBS = 10; private static final int NUM_SIMULATED_OBS = 10;
void PoolGenotypeLikelihoodsUnitTest() {
UAC.minQualityScore = 5;
UAC.maxQualityScore = 40;
UAC.phredScaledPrior = (byte)20;
UAC.minPower = 0.0;
}
@Test @Test
public void testStoringLikelihoodElements() { public void testStoringLikelihoodElements() {
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
int ploidy = 20; int ploidy = 20;
int numAlleles = 4; int numAlleles = 4;
@ -71,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int k=0; k < gls.length; k++) for (int k=0; k < gls.length; k++)
gls[k]= (double)k; gls[k]= (double)k;
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true); GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
double[] glnew = gl.getLikelihoods(); double[] glnew = gl.getLikelihoods();
Assert.assertEquals(gls, glnew); Assert.assertEquals(gls, glnew);
@ -83,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int ploidy = 2; ploidy < 10; ploidy++) { for (int ploidy = 2; ploidy < 10; ploidy++) {
for (int nAlleles = 2; nAlleles < 10; nAlleles++) for (int nAlleles = 2; nAlleles < 10; nAlleles++)
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy), Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy)); GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
} }
@ -95,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create iterator, compare linear index given by iterator with closed form function // create iterator, compare linear index given by iterator with closed form function
int numAlleles = 4; int numAlleles = 4;
int ploidy = 2; int ploidy = 2;
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy); GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
while(iterator.hasNext()) { while(iterator.hasNext()) {
System.out.format("\n%d:",iterator.getLinearIndex()); System.out.format("\n%d:",iterator.getLinearIndex());
@ -104,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
System.out.format("%d ",aa); System.out.format("%d ",aa);
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy); int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
System.out.format("Computed idx = %d\n",computedIdx); System.out.format("Computed idx = %d\n",computedIdx);
iterator.next(); iterator.next();
} }
@ -133,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
allelesToSubset.add(Allele.create("A",false)); allelesToSubset.add(Allele.create("A",false));
allelesToSubset.add(Allele.create("C",false)); allelesToSubset.add(Allele.create("C",false));
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy, double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
originalAlleles, allelesToSubset); originalAlleles, allelesToSubset);
@ -163,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test @Test
public void testIndexIterator() { public void testIndexIterator() {
int[] seed = new int[]{1,2,3,4}; int[] seed = new int[]{1,2,3,4};
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1); GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1); Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
@ -221,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
} }
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) { private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo); GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
while(iterator.hasNext()) { while(iterator.hasNext()) {
int[] a = iterator.getCurrentVector(); int[] a = iterator.getCurrentVector();
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo); int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
if (VERBOSE) { if (VERBOSE) {
System.out.format("%d:",iterator.getLinearIndex()); System.out.format("%d:",iterator.getLinearIndex());
for (int i=0; i < seed.length; i++) for (int i=0; i < seed.length; i++)
@ -251,8 +258,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test @Test
public void testErrorModel() { public void testErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte minQ = 5;
final byte maxQ = 40;
final byte refByte = refPileupTestProvider.getRefByte(); final byte refByte = refPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
@ -270,7 +275,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise // get artificial alignment context for ref sample - no noise
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30); Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0); final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -287,19 +292,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test @Test
public void testIndelErrorModel() { public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte minQ = 5;
final byte maxQ = 40;
final byte refByte = refPileupTestProvider.getRefByte(); final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = "TCA"; final String altBases = "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>(); final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true)); trueAlleles.add(Allele.create(refByte, true));
trueAlleles.add(Allele.create("TC", false)); trueAlleles.add(Allele.create((char)refByte + "TC", false));
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make(); genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
final int[] matchArray = {95, 995, 9995, 10000}; final int[] matchArray = {95, 995, 9995, 10000};
@ -311,9 +314,9 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get artificial alignment context for ref sample - no noise // get artificial alignment context for ref sample - no noise
// CASE 1: Test HET insertion // CASE 1: Test HET insertion
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30); Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0); final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -329,12 +332,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create deletion VC // create deletion VC
final int delLength = 4; final int delLength = 4;
final List<Allele> delAlleles = new ArrayList<Allele>(); final List<Allele> delAlleles = new ArrayList<Allele>();
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true)); delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false)); delAlleles.add(Allele.create(refByte, false));
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make(); genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
for (int matches: matchArray) { for (int matches: matchArray) {
for (int mismatches: mismatchArray) { for (int mismatches: mismatchArray) {
@ -343,7 +346,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30); Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0); final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -388,8 +391,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte(); final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
final Set<String> laneIDs = new TreeSet<String>(); final Set<String> laneIDs = new TreeSet<String>();
@ -407,17 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs) for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel); noisyErrorModels.put(laneID, Q30ErrorModel);
// all first ref allele
allAlleles.add(Allele.create(refByte,true));
for (byte b: BaseUtils.BASES) { for (byte b: BaseUtils.BASES) {
if (refByte == b) if (refByte != b)
allAlleles.add(Allele.create(b,true));
else
allAlleles.add(Allele.create(b, false)); allAlleles.add(Allele.create(b, false));
} }
final int refIdx = 0;
int altIdx = -1;
for (int k=0; k < allAlleles.size(); k++)
if (altByte == allAlleles.get(k).getBases()[0]) {
altIdx = k;
break;
}
PrintStream out = null; PrintStream out = null;
if (SIMULATE_NOISY_PILEUP) { if (SIMULATE_NOISY_PILEUP) {
try { try {
out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table")); out = new PrintStream(new File("GLUnitTest.table"));
// out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table")); // out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
} }
catch (Exception e) {} catch (Exception e) {}
@ -441,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this // get now likelihoods for this
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true); final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
if (VERBOSE) { if (VERBOSE) {
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool); System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -470,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this // get now likelihoods for this
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true); final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
mlPair = noisyGL.getMostLikelyACCount(); mlPair = noisyGL.getMostLikelyACCount();

View File

@ -12,50 +12,66 @@ import org.testng.annotations.Test;
* Time: 11:28 AM * Time: 11:28 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class PoolCallerIntegrationTest extends WalkerTest { public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final static String REF = b37KGReference; final static String REF = b37KGReference;
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list"; final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam"; final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf"; final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
final String REFSAMPLE_NAME = "NA12878"; final String REFSAMPLE_NAME = "NA12878";
final String MTINTERVALS = "MT"; final String MTINTERVALS = "MT:1-3000";
final String LSVINTERVALS = "20:40,000,000-41,000,000"; final String LSVINTERVALS = "20:40,000,000-41,000,000";
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf"; final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf"; final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf"; final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
private void PC_MT_Test(String bam, String args, String name, String md5) { private void PC_MT_Test(String bam, String args, String name, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL", final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s"; REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec); executeTest("testPoolCaller:"+name+" args=" + args, spec);
} }
private void PC_LSV_Test(String args, String name, String model, String md5) { private void PC_LSV_Test(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL", final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s"; REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec); executeTest("testPoolCaller:"+name+" args=" + args, spec);
} }
@Test private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() { public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4"); PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
} }
@Test @Test(enabled = true)
public void testINDEL_GGA_Pools() { public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
} }
@Test @Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() { public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5"); PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
} }
@Test @Test(enabled = true)
public void testMT_SNP_GGA_sp10() { public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
} }
} }

View File

@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + ref + SNP = MNP with ref base gap // SNP + ref + SNP = MNP with ref base gap
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + SNP // insertion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + insertion // SNP + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + SNP // deletion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + deletion // SNP + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion = MNP // insertion + deletion = MNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion // insertion + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + insertion // insertion + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + deletion // deletion + deletion
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make(); thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex // complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
} }
/** /**

View File

@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSample() { public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "882ece8391cfec30ab658970a487d078"); HCTest(CEUTRIO_BAM, "", "6b30c7e1b6bbe80d180d9d67441cec12");
} }
@Test @Test
public void testHaplotypeCallerSingleSample() { public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "6f458e04251bc4c09b5b544d46f19d68"); HCTest(NA12878_BAM, "", "4cdfbfeadef00725974828310558d7d4");
} }
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "c172791007f867bf3b975d4194564d9e"); HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "6183fb6e374976d7087150009685e043");
} }
private void HCTestComplexVariants(String bam, String args, String md5) { private void HCTestComplexVariants(String bam, String args, String md5) {
@ -41,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleComplex() { public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "94186811016b332a58c150df556278f8"); HCTestComplexVariants(CEUTRIO_BAM, "", "ab7593a7a60a2e9a66053572f1718df1");
} }
} }

View File

@ -95,9 +95,10 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>(); ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
for( int iii = 1; iii <= 3; iii++) { for( int iii = 1; iii <= 3; iii++) {
Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) ); Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
int readCount = 1;
if( readLikelihood != null ) { if( readLikelihood != null ) {
Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() ); Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}); haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount});
haplotypes.add(haplotype); haplotypes.add(haplotype);
} }
} }

View File

@ -7,6 +7,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
*/ */
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -18,6 +20,7 @@ import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*; import java.util.*;
public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
@ -143,6 +146,44 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
Assert.assertTrue(graphEquals(graph, expectedGraph)); Assert.assertTrue(graphEquals(graph, expectedGraph));
} }
@Test(enabled=false)
// not ready yet
public void testBasicGraphCreation() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
final String altBase = (refBase==(byte)'A'?"C":"A");
final int matches = 50;
final int mismatches = 50;
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
PrintStream graphWriter = null;
try{
graphWriter = new PrintStream("du.txt");
} catch (Exception e) {}
SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
refHaplotype.setIsReference(true);
assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
/* // clean up the graphs by pruning and merging
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
//eliminateNonRefPaths( graph );
SimpleDeBruijnAssembler.mergeNodes( graph );
}
*/
if( graphWriter != null ) {
assembler.printGraphs();
}
int k=2;
// find the best paths in the graphs
// return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
}
@Test(enabled = true) @Test(enabled = true)
public void testEliminateNonRefPaths() { public void testEliminateNonRefPaths() {
DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class); DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);

View File

@ -1,8 +1,18 @@
library("ggplot2") library("ggplot2")
library(gplots)
library("reshape")
library("grid")
library("tools") #For compactPDF in R 2.13+ library("tools") #For compactPDF in R 2.13+
library(gsalib)
args <- commandArgs(TRUE)
if ( interactive() ) {
args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
} else {
args <- commandArgs(TRUE)
}
data <- read.csv(args[1]) data <- read.csv(args[1])
gsa.report <- gsa.read.gatkreport(args[2])
data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType)))) data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))
numRG = length(unique(data$ReadGroup)) numRG = length(unique(data$ReadGroup))
@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn
p <- ggplot(d, aes(x=CovariateValue)) + p <- ggplot(d, aes(x=CovariateValue)) +
xlab(paste(cov,"Covariate")) + xlab(paste(cov,"Covariate")) +
ylab("Number of Observations") + ylab("No. of Observations (area normalized)") +
blankTheme blankTheme
d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
scale_y_continuous(formatter="comma") d <- d + scale_fill_manual(values=c("maroon1","blue"))
d <- d + facet_grid(.~EventType)
# d <- d + scale_y_continuous(formatter="comma")
} }
} }
pdf(args[2],height=9,width=15) if ( ! is.na(args[3]) )
pdf(args[3],height=9,width=15)
#frame()
textplot(gsa.report$Arguments, show.rownames=F)
title(
main="GATK BaseRecalibration report",
sub=date())
distributeGraphRows(list(a,b,c), c(1,1,1)) distributeGraphRows(list(a,b,c), c(1,1,1))
distributeGraphRows(list(d,e,f), c(1,1,1)) distributeGraphRows(list(d,e,f), c(1,1,1))
dev.off()
# format the overall information
rt0 <- data.frame(
ReadGroup = gsa.report$RecalTable0$ReadGroup,
EventType = gsa.report$RecalTable0$EventType,
EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))
textplot(t(rt0), show.colnames=F)
title("Overall error rates by event type")
if (exists('compactPDF')) { # plot per quality score recalibration table
textplot(gsa.report$RecalTable1, show.rownames=F)
title("Error rates by event type and initial quality score")
if ( ! is.na(args[3]) ) {
dev.off()
if (exists('compactPDF')) {
compactPDF(args[2]) compactPDF(args[2])
}
} }

View File

@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
if ( requestedStrat == "Sample" ) { if ( requestedStrat == "Sample" ) {
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "") perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
} else { # by AlleleCount } else { # by AlleleCount
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs))) perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount") perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")

View File

@ -208,6 +208,7 @@ public class FastaSequenceIndexBuilder {
break; break;
} }
} }
in.close();
return sequenceIndex; return sequenceIndex;
} }
catch (IOException e) { catch (IOException e) {

View File

@ -12,10 +12,10 @@ import java.util.*;
*/ */
public class Bases implements Iterable<Byte> public class Bases implements Iterable<Byte>
{ {
public static byte A = 'A'; public static final byte A = 'A';
public static byte C = 'C'; public static final byte C = 'C';
public static byte G = 'G'; public static final byte G = 'G';
public static byte T = 'T'; public static final byte T = 'T';
public static final Bases instance = new Bases(); public static final Bases instance = new Bases();

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.lang.annotation.Annotation; import java.lang.annotation.Annotation;
import java.util.List; import java.util.List;
@ -147,6 +148,9 @@ public class ArgumentDefinition {
this.exclusiveOf = exclusiveOf; this.exclusiveOf = exclusiveOf;
this.validation = validation; this.validation = validation;
this.validOptions = validOptions; this.validOptions = validOptions;
validateName(shortName);
validateName(fullName);
} }
/** /**
@ -192,6 +196,9 @@ public class ArgumentDefinition {
else else
shortName = null; shortName = null;
validateName(shortName);
validateName(fullName);
this.ioType = ioType; this.ioType = ioType;
this.argumentType = argumentType; this.argumentType = argumentType;
this.fullName = fullName; this.fullName = fullName;
@ -277,4 +284,14 @@ public class ArgumentDefinition {
String validation = (String)CommandLineUtils.getValue(annotation, "validation"); String validation = (String)CommandLineUtils.getValue(annotation, "validation");
return validation.trim().length() > 0 ? validation.trim() : null; return validation.trim().length() > 0 ? validation.trim() : null;
} }
/**
* Make sure the argument's name is valid
*
* @param name
*/
private void validateName(final String name) {
if ( name != null && name.startsWith("-") )
throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
}
} }

View File

@ -55,10 +55,8 @@ public class ArgumentDefinitionGroup implements Iterable<ArgumentDefinition> {
* Does the name of this argument group match the name of another? * Does the name of this argument group match the name of another?
*/ */
public boolean groupNameMatches( ArgumentDefinitionGroup other ) { public boolean groupNameMatches( ArgumentDefinitionGroup other ) {
if( this.groupName == null && other.groupName == null ) if( this.groupName == null )
return true; return other.groupName == null;
if( this.groupName == null && other.groupName != null )
return false;
return this.groupName.equals(other.groupName); return this.groupName.equals(other.groupName);
} }

View File

@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor {
/** /**
* our log, which we want to capture anything from org.broadinstitute.sting * our log, which we want to capture anything from org.broadinstitute.sting
*/ */
protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class); protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
/** /**
* Fetch the given descriptor from the descriptor repository. * Fetch the given descriptor from the descriptor repository.

View File

@ -120,8 +120,8 @@ public abstract class ParsingMethod {
*/ */
private static final String TAG_TEXT = "[\\w\\-\\.\\=]*"; private static final String TAG_TEXT = "[\\w\\-\\.\\=]*";
public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.FullNameDefinitionMatcher) {}; ArgumentDefinitions.FullNameDefinitionMatcher) {};
public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
ArgumentDefinitions.ShortNameDefinitionMatcher) {}; ArgumentDefinitions.ShortNameDefinitionMatcher) {};
} }

View File

@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
if ( getArgumentCollection().gatkKeyFile == null ) { if ( getArgumentCollection().gatkKeyFile == null ) {
throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
"Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
"for more information and instructions on how to obtain a key."); " for more information and instructions on how to obtain a key.");
} }
else { else {
PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();

View File

@ -130,6 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing // can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 ) if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage())); exitSystemWithUserError(new UserException(t.getCause().getMessage()));
} }

View File

@ -233,10 +233,6 @@ public class GenomeAnalysisEngine {
if (args.nonDeterministicRandomSeed) if (args.nonDeterministicRandomSeed)
resetRandomGenerator(System.currentTimeMillis()); resetRandomGenerator(System.currentTimeMillis());
// TODO -- REMOVE ME WHEN WE STOP BCF testing
if ( args.USE_SLOW_GENOTYPES )
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
// if the use specified an input BQSR recalibration table then enable on the fly recalibration // if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (args.BQSR_RECAL_FILE != null) if (args.BQSR_RECAL_FILE != null)
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals); setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
@ -797,6 +793,14 @@ public class GenomeAnalysisEngine {
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
if (argCollection.keepProgramRecords)
removeProgramRecords = false;
return new SAMDataSource( return new SAMDataSource(
samReaderIDs, samReaderIDs,
threadAllocation, threadAllocation,
@ -813,7 +817,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(), getWalkerBAQQualityMode(),
refReader, refReader,
getBaseRecalibration(), getBaseRecalibration(),
argCollection.defaultBaseQualities); argCollection.defaultBaseQualities,
removeProgramRecords);
} }
/** /**
@ -840,20 +845,9 @@ public class GenomeAnalysisEngine {
SAMSequenceDictionary sequenceDictionary, SAMSequenceDictionary sequenceDictionary,
GenomeLocParser genomeLocParser, GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
VCFHeader header = null; final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
if ( getArguments().repairVCFHeader != null ) {
try {
final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
pbs.close();
} catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
}
}
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType); final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles) for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
builder, builder,

View File

@ -57,8 +57,6 @@ public class GATKArgumentCollection {
public GATKArgumentCollection() { public GATKArgumentCollection() {
} }
public Map<String, String> walkerArgs = new HashMap<String, String>();
// parameters and their defaults // parameters and their defaults
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<String> samFiles = new ArrayList<String>(); public List<String> samFiles = new ArrayList<String>();
@ -66,10 +64,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null; public Integer readBufferSize = null;
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
public File gatkKeyFile = null; public File gatkKeyFile = null;
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@ -249,6 +247,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
public boolean removeProgramRecords = false;
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
public boolean keepProgramRecords = false;
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe; public ValidationExclusion.TYPE unsafe;
@ -375,19 +379,5 @@ public class GATKArgumentCollection {
@Hidden @Hidden
public boolean generateShadowBCF = false; public boolean generateShadowBCF = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
@Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
@Hidden
public boolean USE_SLOW_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
* VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these
* to fill in any missing ones in input VCF files.
*/
@Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
public File repairVCFHeader = null;
} }

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
/**
* Created with IntelliJ IDEA.
* User: rpoplin
* Date: 8/20/12
* A collection of arguments that are common to the various callers.
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
*/
public class StandardCallerArgumentCollection {
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
*/
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
/**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
* is the default).
*/
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
/**
* This argument allows you to emit low quality calls as filtered records.
*/
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*/
@Advanced
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 3;
}

View File

@ -118,7 +118,7 @@ class WindowedData {
rec.getAlignmentStart(), rec.getAlignmentStart(),
stop); stop);
states = new ArrayList<RMDDataState>(); states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null) if (provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(range))); states.add(new RMDDataState(dataSource, dataSource.seek(range)));
} }

View File

@ -89,6 +89,11 @@ public class SAMDataSource {
*/ */
private final SAMFileReader.ValidationStringency validationStringency; private final SAMFileReader.ValidationStringency validationStringency;
/**
* Do we want to remove the program records from this data source?
*/
private final boolean removeProgramRecords;
/** /**
* Store BAM indices for each reader present. * Store BAM indices for each reader present.
*/ */
@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY, BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ null, // no BAQ
null, // no BQSR null, // no BQSR
(byte) -1); (byte) -1,
false);
} }
/** /**
@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier, BaseRecalibration bqsrApplier,
byte defaultBaseQualities) { byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics(); this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null; dispatcher = null;
validationStringency = strictness; validationStringency = strictness;
this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null) if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize); ReadShard.setReadBufferSize(readBufferSize);
else { else {
@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() { private synchronized void createNewResource() {
if(allResources.size() > maxEntries) if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readerIDs, validationStringency); SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers); allResources.add(readers);
availableResources.add(readers); availableResources.add(readers);
} }
@ -777,9 +785,11 @@ public class SAMDataSource {
/** /**
* Derive a new set of readers from the Reads metadata. * Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load. * @param readerIDs reads to load.
* TODO: validationStringency is not used here
* @param validationStringency validation stringency. * @param validationStringency validation stringency.
* @param removeProgramRecords indicate whether to clear program records from the readers
*/ */
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size(); final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1; int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start(); final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime(); long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) { for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call(); final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) { if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
} }

View File

@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.LinkedList;
import java.util.List; import java.util.List;
/** /**
@ -56,30 +55,31 @@ public class ReferenceDataSource {
private IndexedFastaSequenceFile reference; private IndexedFastaSequenceFile reference;
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class); protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
/** /**
* Create reference data source from fasta file * Create reference data source from fasta file
* @param fastaFile Fasta file to be used as reference * @param fastaFile Fasta file to be used as reference
*/ */
public ReferenceDataSource(File fastaFile) { public ReferenceDataSource(File fastaFile) {
// does the fasta file exist? check that first... // does the fasta file exist? check that first...
if (!fastaFile.exists()) if (!fastaFile.exists())
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist."); throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
File dictFile;
if (fastaFile.getAbsolutePath().endsWith("fa")) { final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
} // determine the name for the dict file
else final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict")); final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
/* /*
if index file does not exist, create it manually * if index file does not exist, create it manually
*/ */
if (!indexFile.exists()) { if (!indexFile.exists()) {
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath())); logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true); FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
try { try {
@ -115,6 +115,8 @@ public class ReferenceDataSource {
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
*/ */
if (!dictFile.exists()) { if (!dictFile.exists()) {
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath())); logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
/* /*

View File

@ -58,7 +58,7 @@ import java.util.Collection;
/** Shards and schedules data in manageable chunks. */ /** Shards and schedules data in manageable chunks. */
public abstract class MicroScheduler implements MicroSchedulerMBean { public abstract class MicroScheduler implements MicroSchedulerMBean {
protected static Logger logger = Logger.getLogger(MicroScheduler.class); protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
/** /**
* Counts the number of instances of the class that are currently alive. * Counts the number of instances of the class that are currently alive.

View File

@ -66,13 +66,13 @@ public class TreeReducer implements Callable {
* @return Result of the reduce. * @return Result of the reduce.
*/ */
public Object call() { public Object call() {
Object result = null; Object result;
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();
try { try {
if( lhs == null ) if( lhs == null )
result = lhs.get(); result = null;
// todo -- what the hell is this above line? Shouldn't it be the two below? // todo -- what the hell is this above line? Shouldn't it be the two below?
// if( lhs == null ) // if( lhs == null )
// throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) ); // throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );

View File

@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.Iterator;
/** /**
* Filter out reads with wonky cigar strings. * Filter out reads with wonky cigar strings.
* *
* - No reads with Hard/Soft clips in the middle of the cigar
* - No reads starting with deletions (with or without preceding clips)
* - No reads ending in deletions (with or without follow-up clips)
* - No reads that are fully hard or soft clipped
* - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
*
* ps: apparently an empty cigar is okay...
*
* @author ebanks * @author ebanks
* @version 0.1 * @version 0.1
*/ */
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
public boolean filterOut(final SAMRecord rec) { public boolean filterOut(final SAMRecord rec) {
final Cigar c = rec.getCigar(); final Cigar c = rec.getCigar();
if( c.isEmpty() ) { return false; } // if there is no Cigar then it can't be bad
boolean previousElementWasIndel = false; // if there is no Cigar then it can't be bad
CigarOperator lastOp = c.getCigarElement(0).getOperator(); if( c.isEmpty() ) {
return false;
}
if (lastOp == CigarOperator.D) // filter out reads starting with deletion Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
CigarOperator firstOp = CigarOperator.H;
while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
CigarOperator op = elementIterator.next().getOperator();
// No reads with Hard/Soft clips in the middle of the cigar
if (firstOp != CigarOperator.H && op == CigarOperator.H) {
return true;
}
firstOp = op;
}
// No reads starting with deletions (with or without preceding clips)
if (firstOp == CigarOperator.D) {
return true;
}
boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
boolean previousElementWasIndel = firstOp == CigarOperator.I;
CigarOperator lastOp = firstOp;
CigarOperator previousOp = firstOp;
while (elementIterator.hasNext()) {
CigarOperator op = elementIterator.next().getOperator();
if (op != CigarOperator.S && op != CigarOperator.H) {
// No reads with Hard/Soft clips in the middle of the cigar
if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
return true; return true;
for (CigarElement ce : c.getCigarElements()) { lastOp = op;
CigarOperator op = ce.getOperator();
if (op == CigarOperator.D || op == CigarOperator.I) {
if (previousElementWasIndel)
return true; // filter out reads with adjacent I/D
if (!hasMeaningfulElements && op.consumesReadBases()) {
hasMeaningfulElements = true;
}
if (op == CigarOperator.I || op == CigarOperator.D) {
// No reads that have consecutive indels in the cigar (II, DD, ID or DI)
if (previousElementWasIndel) {
return true;
}
previousElementWasIndel = true; previousElementWasIndel = true;
} }
else // this is a regular base (match/mismatch/hard or soft clip) else {
previousElementWasIndel = false; // reset the previous element previousElementWasIndel = false;
}
lastOp = op; }
// No reads with Hard/Soft clips in the middle of the cigar
else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
return true;
} }
return lastOp == CigarOperator.D; previousOp = op;
}
// No reads ending in deletions (with or without follow-up clips)
// No reads that are fully hard or soft clipped
return lastOp == CigarOperator.D || !hasMeaningfulElements;
} }
} }

View File

@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker {
try { try {
tempFile = File.createTempFile( stub.getClass().getName(), null ); tempFile = File.createTempFile( stub.getClass().getName(), null );
tempFile.deleteOnExit(); //tempFile.deleteOnExit();
} }
catch( IOException ex ) { catch( IOException ex ) {
throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() ); throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );

View File

@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
if (stub.getGenerateMD5()) if (stub.getGenerateMD5())
factory.setCreateMd5File(true); factory.setCreateMd5File(true);
// Adjust max records in RAM. // Adjust max records in RAM.
// TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
if(stub.getMaxRecordsInRam() != null) if(stub.getMaxRecordsInRam() != null)
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam()); factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

View File

@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.io.storage;
import net.sf.samtools.util.BlockCompressedOutputStream; import net.sf.samtools.util.BlockCompressedOutputStream;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -60,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
protected final File file; protected final File file;
protected OutputStream stream; protected OutputStream stream;
protected final VariantContextWriter writer; protected final VariantContextWriter writer;
boolean closed = false;
/** /**
* Constructs an object which will write directly into the output file provided by the stub. * Constructs an object which will write directly into the output file provided by the stub.
@ -81,6 +83,18 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
} }
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param tempFile File into which to direct the output data.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
logger.debug("Creating temporary output file " + tempFile.getAbsolutePath() + " for VariantContext output.");
this.file = tempFile;
this.writer = vcfWriterToFile(stub, file, false);
writer.writeHeader(stub.getVCFHeader());
}
/** /**
* common initialization routine for multiple constructors * common initialization routine for multiple constructors
* @param stub Stub to use when constructing the output file. * @param stub Stub to use when constructing the output file.
@ -139,19 +153,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
} }
} }
/**
* Constructs an object which will redirect into a different file.
* @param stub Stub to use when synthesizing file / header info.
* @param tempFile File into which to direct the output data.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
logger.debug("Creating temporary VCF file " + tempFile.getAbsolutePath() + " for VCF output.");
this.file = tempFile;
this.writer = vcfWriterToFile(stub, file, false);
writer.writeHeader(stub.getVCFHeader());
}
public void add(VariantContext vc) { public void add(VariantContext vc) {
writer.add(vc); writer.add(vc);
} }
@ -172,20 +173,34 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
if(file != null) if(file != null)
logger.debug("Closing temporary file " + file.getAbsolutePath()); logger.debug("Closing temporary file " + file.getAbsolutePath());
writer.close(); writer.close();
closed = true;
} }
public void mergeInto(VariantContextWriterStorage target) { public void mergeInto(VariantContextWriterStorage target) {
try { try {
String sourceFilePath = file.getAbsolutePath(); if ( ! closed )
String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin"; throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath)); final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false); logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
for ( VariantContext vc : source.iterator() ) { // use the feature manager to determine the right codec for the tmp file
// that way we don't assume it's a specific type
final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file);
if ( fd == null )
throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file);
final FeatureCodec<VariantContext> codec = fd.getCodec();
final AbstractFeatureReader<VariantContext> source =
AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false);
for ( final VariantContext vc : source.iterator() ) {
target.writer.add(vc); target.writer.add(vc);
} }
source.close(); source.close();
file.delete(); // this should be last to aid in debugging when the process fails
} catch (UserException e) {
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
} catch (IOException e) { } catch (IOException e) {
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e); throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
} }

View File

@ -47,6 +47,7 @@ import java.util.List;
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only"; public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>(); public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
// //
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) { public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition()); return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
} }
/** /**
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;
} }
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if(writerFile == null && !source.isRequired()) if(writerFile == null && !source.isRequired())
throw new MissingArgumentValueException(defaultArgumentDefinition); throw new MissingArgumentValueException(defaultArgumentDefinition);
// Should we compress the output stream?
boolean compress = isCompressed(writerFileName);
boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object. // Create a stub for the given object.
VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes) final VariantContextWriterStub stub = (writerFile != null)
: new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes); ? new VariantContextWriterStub(engine, writerFile, argumentSources)
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional no_header argument for the VCF file.
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional sites_only argument definition
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createSitesOnlyArgumentDefinition() { private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null ); null );
} }
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/** /**
* Returns true if the file will be compressed. * Returns true if the file will be compressed.
* @param writerFileName Name of the file * @param writerFileName Name of the file

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.Options; import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
@ -78,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should we emit a compressed output stream? * Should we emit a compressed output stream?
*/ */
private final boolean isCompressed; private boolean isCompressed = false;
/** /**
* A hack: push the argument sources into the VCF header so that the VCF header * A hack: push the argument sources into the VCF header so that the VCF header
@ -89,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should the header be written out? A hidden argument. * Should the header be written out? A hidden argument.
*/ */
private final boolean skipWritingCommandLineHeader; private boolean skipWritingCommandLineHeader = false;
/** /**
* Should we not write genotypes even when provided? * Should we not write genotypes even when provided?
*/ */
private final boolean doNotWriteGenotypes; private boolean doNotWriteGenotypes = false;
/**
* Should we force BCF writing regardless of the file extension?
*/
private boolean forceBCF = false;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
@ -107,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.genotypeStream = null; this.genotypeStream = null;
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -127,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeStream stream to (ultimately) write. * @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = null; this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream); this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -166,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
return isCompressed; return isCompressed;
} }
public void setCompressed(boolean compressed) {
isCompressed = compressed;
}
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
}
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public void setForceBCF(boolean forceBCF) {
this.forceBCF = forceBCF;
}
/** /**
* Gets the master sequence dictionary from the engine associated with this stub * Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary * @link GenomeAnalysisEngine.getMasterSequenceDictionary
@ -186,6 +196,9 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
} }

View File

@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
return stepForwardOnGenome(); return stepForwardOnGenome();
} else { } else {
if (curElement != null && curElement.getOperator() == CigarOperator.D) if (curElement != null && curElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar"); throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// Reads that contain indels model the genomeOffset as the following base in the reference. Because // Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the // we fall into this else block only when indels end the read, increment genomeOffset such that the
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
break; break;
case D: // deletion w.r.t. the reference case D: // deletion w.r.t. the reference
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar"); throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// should be the same as N case // should be the same as N case
genomeOffset++; genomeOffset++;
done = true; done = true;
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
done = true; done = true;
break; break;
case M: case M:
case EQ:
case X:
readOffset++; readOffset++;
genomeOffset++; genomeOffset++;
done = true; done = true;
@ -279,7 +281,6 @@ public class LocusIteratorByState extends LocusIterator {
*/ */
private void lazyLoadNextAlignmentContext() { private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) { while (nextAlignmentContext == null && readStates.hasNext()) {
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
readStates.collectPendingReads(); readStates.collectPendingReads();
final GenomeLoc location = getLocation(); final GenomeLoc location = getLocation();
@ -378,7 +379,7 @@ public class LocusIteratorByState extends LocusIterator {
CigarOperator op = state.stepForwardOnGenome(); CigarOperator op = state.stepForwardOnGenome();
if (op == null) { if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was // we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object it.remove(); // we've stepped off the end of the object
} }

View File

@ -86,13 +86,14 @@ public class GATKRunReport {
private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE"); private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");
// number of milliseconds before the S3 put operation is timed-out: // number of milliseconds before the S3 put operation is timed-out:
private static final long S3PutTimeOut = 30 * 1000; private static final long S3PutTimeOut = 10 * 1000;
public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";
/** /**
* our log * our log
*/ */
protected static Logger logger = Logger.getLogger(GATKRunReport.class); protected static final Logger logger = Logger.getLogger(GATKRunReport.class);
@Element(required = false, name = "id") @Element(required = false, name = "id")

View File

@ -163,43 +163,58 @@ public class VariantContextAdaptors {
@Override @Override
public VariantContext convert(String name, Object input, ReferenceContext ref) { public VariantContext convert(String name, Object input, ReferenceContext ref) {
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
boolean sawNullAllele = refAllele.isNull();
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
Allele altAllele = Allele.create(alt, false);
alleles.add(altAllele);
if ( altAllele.isNull() )
sawNullAllele = true;
}
Map<String, Object> attributes = new HashMap<String, Object>();
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 ) if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
final byte refBaseForIndel = ref.getBases()[index];
boolean addPaddingBase;
if ( isSNP(dbsnp) || isMNP(dbsnp) )
addPaddingBase = false;
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
else
return null; // can't handle anything else
Allele refAllele;
if ( dbsnp.getNCBIRefBase().equals("-") )
refAllele = Allele.create(refBaseForIndel, true);
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
else
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( Allele.wouldBeNullAllele(alt.getBytes()))
alt = "";
else if ( ! Allele.acceptableAlleleBases(alt) )
return null;
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
}
final VariantContextBuilder builder = new VariantContextBuilder(); final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID()); builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0)); builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
builder.alleles(alleles); builder.alleles(alleles);
builder.referenceBaseForIndel(refBaseForIndel);
return builder.make(); return builder.make();
} else }
return null; // can't handle anything else
private static List<String> stripNullDashes(final List<String> alleles) {
final List<String> newAlleles = new ArrayList<String>(alleles.size());
for ( final String allele : alleles ) {
if ( allele.equals("-") )
newAlleles.add("");
else
newAlleles.add(allele);
}
return newAlleles;
} }
} }
@ -294,7 +309,6 @@ public class VariantContextAdaptors {
int index = hapmap.getStart() - ref.getWindow().getStart(); int index = hapmap.getStart() - ref.getWindow().getStart();
if ( index < 0 ) if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
HashSet<Allele> alleles = new HashSet<Allele>(); HashSet<Allele> alleles = new HashSet<Allele>();
Allele refSNPAllele = Allele.create(ref.getBase(), true); Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -351,7 +365,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd(); long end = hapmap.getEnd();
if ( deletionLength > 0 ) if ( deletionLength > 0 )
end += deletionLength; end += deletionLength;
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make(); VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
return vc; return vc;
} }
} }

View File

@ -85,18 +85,16 @@ public class FeatureManager {
private final PluginManager<FeatureCodec> pluginManager; private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>(); private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
private final VCFHeader headerForRepairs;
private final boolean lenientVCFProcessing; private final boolean lenientVCFProcessing;
/** /**
* Construct a FeatureManager without a master VCF header * Construct a FeatureManager without a master VCF header
*/ */
public FeatureManager() { public FeatureManager() {
this(null, false); this(false);
} }
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) { public FeatureManager(final boolean lenientVCFProcessing) {
this.headerForRepairs = headerForRepairs;
this.lenientVCFProcessing = lenientVCFProcessing; this.lenientVCFProcessing = lenientVCFProcessing;
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec"); pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
@ -255,8 +253,6 @@ public class FeatureManager {
((NameAwareCodec)codex).setName(name); ((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec ) if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
if ( codex instanceof VCFCodec )
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing ) if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
((AbstractVCFCodec)codex).disableOnTheFlyModifications(); ((AbstractVCFCodec)codex).disableOnTheFlyModifications();

View File

@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
* please talk through your approach with the SE team. * please talk through your approach with the SE team.
* @param dict Sequence dictionary to use. * @param dict Sequence dictionary to use.
* @param genomeLocParser Location parser to use. * @param genomeLocParser Location parser to use.
* @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification. * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
*/ */
public RMDTrackBuilder(final SAMSequenceDictionary dict, public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
final VCFHeader headerForRepairs,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
this.dict = dict; this.dict = dict;
this.validationExclusionType = validationExclusionType; this.validationExclusionType = validationExclusionType;
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
} }
/** /**
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
return featureManager; return featureManager;
} }
/**
* Same as full constructor but makes one without a header for repairs
* @param dict
* @param genomeLocParser
* @param validationExclusionType
*/
public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
this(dict, genomeLocParser, null, validationExclusionType);
}
/** /**
* create a RMDTrack of the specified type * create a RMDTrack of the specified type
* *

View File

@ -89,9 +89,9 @@ public class GATKReport {
reader = new BufferedReader(new FileReader(file)); reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine(); reportHeader = reader.readLine();
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file); throw new UserException.CouldNotReadInputFile(file, "it does not exist");
} catch (IOException e) { } catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file); throw new UserException.CouldNotReadInputFile(file, e);
} }

View File

@ -207,6 +207,18 @@ public class GATKReportTable {
rowIdToIndex = new HashMap<Object, Integer>(); rowIdToIndex = new HashMap<Object, Integer>();
} }
/**
* Create a new GATKReportTable with the same structure
* @param tableToCopy
*/
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
addColumn(column.getColumnName(), column.getFormat());
if ( copyData )
throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
}
/** /**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
* *
@ -490,6 +502,17 @@ public class GATKReportTable {
return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName)); return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
} }
/**
* Get a value from the given position in the table
*
* @param rowIndex the row ID
* @param columnName the name of the column
* @return the value stored at the specified position in the table
*/
public Object get(final int rowIndex, final String columnName) {
return get(rowIndex, columnNameToIndex.get(columnName));
}
/** /**
* Get a value from the given position in the table * Get a value from the given position in the table
* *

View File

@ -62,54 +62,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
} }
/**
* Simple utility class that makes it convenient to print unit adjusted times
*/
private static class MyTime {
double t; // in Seconds
int precision; // for format
public MyTime(double t, int precision) {
this.t = t;
this.precision = precision;
}
public MyTime(double t) {
this(t, 1);
}
/**
* Instead of 10000 s, returns 2.8 hours
* @return
*/
public String toString() {
double unitTime = t;
String unit = "s";
if ( t > 120 ) {
unitTime = t / 60; // minutes
unit = "m";
if ( unitTime > 120 ) {
unitTime /= 60; // hours
unit = "h";
if ( unitTime > 100 ) {
unitTime /= 24; // days
unit = "d";
if ( unitTime > 20 ) {
unitTime /= 7; // days
unit = "w";
}
}
}
}
return String.format("%6."+precision+"f %s", unitTime, unit);
}
}
/** lock object to sure updates to history are consistent across threads */ /** lock object to sure updates to history are consistent across threads */
private static final Object lock = new Object(); private static final Object lock = new Object();
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>(); LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@ -140,7 +92,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
GenomeLocSortedSet targetIntervals = null; GenomeLocSortedSet targetIntervals = null;
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class); protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
protected GenomeAnalysisEngine engine; protected GenomeAnalysisEngine engine;
@ -280,20 +232,20 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
ProcessingHistory last = updateHistory(loc,cumulativeMetrics); ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
final MyTime elapsed = new MyTime(last.elapsedSeconds); final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
final MyTime bpRate = new MyTime(secondsPerMillionBP(last)); final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
final MyTime unitRate = new MyTime(secondsPerMillionElements(last)); final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last); final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted); final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t); final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
if ( printProgress ) { if ( printProgress ) {
lastProgressPrintTime = curTime; lastProgressPrintTime = curTime;
// dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS ) if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS ) else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
else else
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
@ -308,8 +260,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
lastPerformanceLogPrintTime = curTime; lastPerformanceLogPrintTime = curTime;
synchronized(performanceLogLock) { synchronized(performanceLogLock) {
performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n", performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t, elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed,
fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t); bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(),
timeToCompletion.getTimeInSeconds());
} }
} }
} }
@ -401,7 +354,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
synchronized(performanceLogLock) { synchronized(performanceLogLock) {
// Ignore multiple calls to reset the same lock. // Ignore multiple calls to reset the same lock.
if(performanceLogFile != null && performanceLogFile.equals(fileName)) if(performanceLogFile != null && performanceLogFile.equals(file))
return; return;
// Close an existing log // Close an existing log

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile; import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -28,7 +29,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
/** /**
* our log, which we want to capture anything from this class * our log, which we want to capture anything from this class
*/ */
protected static Logger logger = Logger.getLogger(TraversalEngine.class); protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>(); private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>(); private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) { for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) { if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ); profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
profile.add(fakeLoc, isActiveProb);
} }
} }
} }
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// Call the walkers isActive function for this locus and add them to the list to be integrated later // Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals == null || initialIntervals.overlaps( location ) ) { if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location); profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
profile.add(location, isActiveProb);
} }
// Grab all the previously unseen reads from this pileup and add them to the massive read list // Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// //
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker, private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext, final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) { final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) { if ( walker.hasPresetActiveRegions() ) {
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0; return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else { } else {
return walker.isActive( tracker, refContext, locus ); return walker.isActive( tracker, refContext, locus );
} }

View File

@ -19,7 +19,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
/** /**
* our log, which we want to capture anything from this class * our log, which we want to capture anything from this class
*/ */
protected static Logger logger = Logger.getLogger(TraversalEngine.class); protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
@Override @Override
protected String getTraversalType() { protected String getTraversalType() {

View File

@ -24,7 +24,7 @@ import java.util.List;
public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> { public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReadPairs.class); protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class);
@Override @Override
protected String getTraversalType() { protected String getTraversalType() {

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/ */
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> { public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraverseReads.class); protected static final Logger logger = Logger.getLogger(TraverseReads.class);
@Override @Override
protected String getTraversalType() { protected String getTraversalType() {
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
if( !dataProvider.hasReads() ) if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available."); throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
ReadView reads = new ReadView(dataProvider); ReadView reads = new ReadView(dataProvider);
ReadReferenceView reference = new ReadReferenceView(dataProvider); ReadReferenceView reference = new ReadReferenceView(dataProvider);
@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
ReferenceContext refContext = null; ReferenceContext refContext = null;
// get the array of characters for the reference sequence, since we're a mapped read // get the array of characters for the reference sequence, since we're a mapped read
if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference()) if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
refContext = reference.getReferenceContext(read); refContext = reference.getReferenceContext(read);
// update the number of reads we've seen // update the number of reads we've seen

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -27,10 +28,11 @@ import java.util.List;
*/ */
@By(DataSource.READS) @By(DataSource.READS)
@Requires({DataSource.READS, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ) @PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500) @ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
@ -72,7 +74,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
} }
// Determine probability of active status over the AlignmentContext // Determine probability of active status over the AlignmentContext
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion // Map over the ActiveRegion
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker); public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);

View File

@ -574,7 +574,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
} }
} }
public class ReadClipperWithData extends ReadClipper { public static class ReadClipperWithData extends ReadClipper {
private ClippingData data; private ClippingData data;
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) { public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {

View File

@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
* Allow user to choose between a number of different data sources. * Allow user to choose between a number of different data sources.
*/ */
public enum DataSource { public enum DataSource {
/**
* Does this walker require read (BAM) data to work?
*/
READS, READS,
/**
* Does this walker require reference data to work?
*/
REFERENCE, REFERENCE,
REFERENCE_BASES, // Do I actually need the reference bases passed to the walker?
/**
* Does this walker require reference order data (VCF) to work?
*/
REFERENCE_ORDERED_DATA REFERENCE_ORDERED_DATA
} }

View File

@ -16,9 +16,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
@By(DataSource.READS) @By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS) @PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -64,9 +64,17 @@ import java.util.List;
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> { public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
@Output @Output
PrintStream out; PrintStream out;
/**
* In addition to the standard pileup output, adds 'verbose' output too. The verbose output contains the number of spanning deletions,
* and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality. These per
* read items are delimited with an '@' character.
*/
@Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output") @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
public boolean SHOW_VERBOSE = false; public boolean SHOW_VERBOSE = false;
@ -116,8 +124,6 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
return rodString; return rodString;
} }
private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
private static String createVerboseOutput(final ReadBackedPileup pileup) { private static String createVerboseOutput(final ReadBackedPileup pileup) {
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
boolean isFirst = true; boolean isFirst = true;

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
* Time: 2:52:28 PM * Time: 2:52:28 PM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
@Requires({DataSource.READS, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ) @PartitionBy(PartitionType.READ)
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
public boolean requiresOrderedReads() { return false; } public boolean requiresOrderedReads() { return false; }

View File

@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
@By(DataSource.REFERENCE) @By(DataSource.REFERENCE)
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @Requires({DataSource.REFERENCE})
@Allows(DataSource.REFERENCE) @Allows(DataSource.REFERENCE)
public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> { public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
} }

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 8/2/12
* Time: 1:58 PM
* To change this template use File | Settings | File Templates.
*/
import java.lang.annotation.*;
/**
* Indicates that program records should be removed from SAM headers by default for this walker
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface RemoveProgramRecords {
}

View File

@ -65,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
// by design, first element in LinkedHashMap was ref allele // by design, first element in LinkedHashMap was ref allele
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) { for (Map.Entry<Allele, Double> entry : el.entrySet()) {
if (a.isReference()) if (entry.getKey().isReference())
refLikelihood =el.get(a); refLikelihood = entry.getValue();
else { else {
double like = el.get(a); double like = entry.getValue();
if (like >= altLikelihood) if (like >= altLikelihood)
altLikelihood = like; altLikelihood = like;
} }

View File

@ -22,19 +22,10 @@ import java.util.Map;
/** /**
* Total (unfiltered) depth over all samples. * Total (unfiltered) depth over all samples.
* *
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal * internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. * over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the * N samples with -dcov D is N * D
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL.
*
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
* -dcov D is N * D
*/ */
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {

View File

@ -24,10 +24,10 @@ import java.util.List;
/** /**
* The depth of coverage of each VCF allele in this sample. * The depth of coverage of each VCF allele in this sample.
* *
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal * sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the * REF and ALT fields) is the unfiltered count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
@ -35,17 +35,13 @@ import java.util.List;
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and * are unambiguously informative about the alternate allele). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base * because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what * assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below). * determine the genotype calls.
*/ */
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static final String REF_ALLELE = "REF";
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
if ( g == null || !g.isCalled() ) if ( g == null || !g.isCalled() )
return; return;
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
if ( vc.isSNP() ) if ( vc.isSNP() )
annotateSNP(stratifiedContext, vc, gb); annotateSNP(stratifiedContext, vc, gb);
else if ( vc.isIndel() ) else if ( vc.isIndel() )
annotateIndel(stratifiedContext, vc, gb); annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
} }
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>(); HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlleles() ) for ( Allele allele : vc.getAlleles() )
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
gb.AD(counts); gb.AD(counts);
} }
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
ReadBackedPileup pileup = stratifiedContext.getBasePileup(); ReadBackedPileup pileup = stratifiedContext.getBasePileup();
if ( pileup == null ) if ( pileup == null )
return; return;
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>(); final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
alleleCounts.put(REF_ALLELE, 0);
final Allele refAllele = vc.getReference(); final Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) { for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
} }
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {
if ( p.isBeforeInsertion() ) { if ( p.isBeforeInsertion() ) {
final String b = p.getEventBases(); final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
if ( alleleCounts.containsKey(b) ) { if ( alleleCounts.containsKey(insertion) ) {
alleleCounts.put(b, alleleCounts.get(b)+1); alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
} }
} else if ( p.isBeforeDeletionStart() ) { } else if ( p.isBeforeDeletionStart() ) {
if ( p.getEventLength() == refAllele.length() ) { if ( p.getEventLength() == refAllele.length() - 1 ) {
// this is indeed the deletion allele recorded in VC // this is indeed the deletion allele recorded in VC
final String b = DEL; final Allele deletion = Allele.create(refBase);
if ( alleleCounts.containsKey(b) ) { if ( alleleCounts.containsKey(deletion) ) {
alleleCounts.put(b, alleleCounts.get(b)+1); alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
} }
} }
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) { } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1); alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
} }
} }
int[] counts = new int[alleleCounts.size()]; final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(REF_ALLELE); counts[0] = alleleCounts.get(refAllele);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) ); counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
gb.AD(counts); gb.AD(counts);
} }
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases() // public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

View File

@ -291,8 +291,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
int[][] table = new int[2][2]; int[][] table = new int[2][2];
for ( String sample : stratifiedContexts.keySet() ) { for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final AlignmentContext context = stratifiedContexts.get(sample); final AlignmentContext context = sample.getValue();
if ( context == null ) if ( context == null )
continue; continue;
@ -313,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) { for (Map.Entry<Allele,Double> entry : el.entrySet()) {
if (a.isReference()) if (entry.getKey().isReference())
refLikelihood =el.get(a); refLikelihood = entry.getValue();
else { else {
double like = el.get(a); double like = entry.getValue();
if (like >= altLikelihood) if (like >= altLikelihood)
altLikelihood = like; altLikelihood = like;
} }

View File

@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map; return map;
} }
private class HaplotypeComparator implements Comparator<Haplotype> { private static class HaplotypeComparator implements Comparator<Haplotype> {
public int compare(Haplotype a, Haplotype b) { public int compare(Haplotype a, Haplotype b) {
if (a.getQualitySum() < b.getQualitySum()) if (a.getQualitySum() < b.getQualitySum())
@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
// Score all the reads in the pileup, even the filtered ones // Score all the reads in the pileup, even the filtered ones
final double[] scores = new double[el.size()]; final double[] scores = new double[el.size()];
int i = 0; int i = 0;
for (Allele a : el.keySet()) { for (Map.Entry<Allele, Double> a : el.entrySet()) {
scores[i++] = -el.get(a); scores[i++] = -a.getValue();
if (DEBUG) { if (DEBUG) {
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
} }

View File

@ -61,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
// by design, first element in LinkedHashMap was ref allele // by design, first element in LinkedHashMap was ref allele
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
for (Allele a : el.keySet()) { for (Map.Entry<Allele,Double> a : el.entrySet()) {
if (a.isReference()) if (a.getKey().isReference())
refLikelihood =el.get(a); refLikelihood = a.getValue();
else { else {
double like = el.get(a); double like = a.getValue();
if (like >= altLikelihood) if (like >= altLikelihood)
altLikelihood = like; altLikelihood = like;
} }

View File

@ -87,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
for (Allele a : el.keySet()) { for (Map.Entry<Allele,Double> a : el.entrySet()) {
if (a.isReference()) if (a.getKey().isReference())
refLikelihood = el.get(a); refLikelihood = a.getValue();
else { else {
double like = el.get(a); double like = a.getValue();
if (like >= altLikelihood) if (like >= altLikelihood)
altLikelihood = like; altLikelihood = like;
} }
@ -100,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
final int numAlignedBases = getNumAlignedBases(p.getRead()); final int numAlignedBases = getNumAlignedBases(p.getRead());
int rp = readPos;
if (readPos > numAlignedBases / 2) { if (readPos > numAlignedBases / 2) {
readPos = numAlignedBases - (readPos + 1); readPos = numAlignedBases - (readPos + 1);
} }

View File

@ -66,8 +66,8 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
return map; return map;
} }
public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY }; protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
public static final VCFInfoHeaderLine[] descriptions = { protected static final VCFInfoHeaderLine[] descriptions = {
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"), new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"), new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") }; new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -142,9 +141,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle")); hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle")); hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));
// Open output file specified by output VCF ROD
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
if ( comp.isBound() ) { if ( comp.isBound() ) {
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
@ -250,8 +246,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
// Beagle always produces genotype strings based on the strings we input in the likelihood file. // Beagle always produces genotype strings based on the strings we input in the likelihood file.
String refString = vc_input.getReference().getDisplayString(); String refString = vc_input.getReference().getDisplayString();
if (refString.length() == 0) // ref was null
refString = Allele.NULL_ALLELE_STRING;
Allele bglAlleleA, bglAlleleB; Allele bglAlleleA, bglAlleleB;

View File

@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
for ( Allele allele : preferredVC.getAlleles() ) { for ( Allele allele : preferredVC.getAlleles() ) {
String bglPrintString; String bglPrintString;
if (allele.isNoCall() || allele.isNull()) if (allele.isNoCall())
bglPrintString = "-"; bglPrintString = "-";
else else
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
@ -351,7 +351,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
} }
public static class CachingFormatter { public static class CachingFormatter {
private int maxCacheSize = 0;
private String format; private String format;
private LRUCache<Double, String> cache; private LRUCache<Double, String> cache;
@ -379,7 +378,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
} }
public CachingFormatter(String format, int maxCacheSize) { public CachingFormatter(String format, int maxCacheSize) {
this.maxCacheSize = maxCacheSize;
this.format = format; this.format = format;
this.cache = new LRUCache<Double, String>(maxCacheSize); this.cache = new LRUCache<Double, String>(maxCacheSize);
} }

View File

@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
// write out the alleles at this site // write out the alleles at this site
for ( Allele allele : vc.getAlleles() ) { for ( Allele allele : vc.getAlleles() ) {
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" "); beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
} }
// write out sample level genotypes // write out sample level genotypes

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.commandline.Gatherer; import org.broadinstitute.sting.commandline.Gatherer;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer {
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) { if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original"); final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
} }
else if (!RAC.NO_PLOTS) { else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal"); final File recal_out = new File(output.getName() + ".recal");
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
} }
generalReport.output(outputFile); generalReport.output(outputFile);

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -103,7 +107,7 @@ import java.util.ArrayList;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS) @By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality @Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> { public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
@ArgumentCollection @ArgumentCollection
@ -132,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
*/ */
public void initialize() { public void initialize() {
// TODO -- remove me after the 2.1 release
if ( getToolkit().getArguments().numberOfThreads > 1 )
throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
// check for unsupported access // check for unsupported access
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
@ -143,12 +151,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
if (RAC.LIST_ONLY) { if (RAC.LIST_ONLY) {
RecalDataManager.listAvailableCovariates(logger); RecalUtils.listAvailableCovariates(logger);
System.exit(0); System.exit(0);
} }
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst(); ArrayList<Covariate> requiredCovariates = covariates.getFirst();
ArrayList<Covariate> optionalCovariates = covariates.getSecond(); ArrayList<Covariate> optionalCovariates = covariates.getSecond();
@ -222,17 +230,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
if (readNotSeen(read)) { if (readNotSeen(read)) {
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
RecalDataManager.parsePlatformForRead(read, RAC); RecalUtils.parsePlatformForRead(read, RAC);
if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
continue; continue;
} }
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates)); read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
} }
if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
RecalDataManager.isColorSpaceConsistent(read, offset)) RecalUtils.isColorSpaceConsistent(read, offset))
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap
} }
countedSites++; countedSites++;
@ -242,9 +250,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
} }
/** /**
* Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker. * Initialize the reduce step by returning 0L
* *
* @return returns A PrintStream created from the -recalFile filename argument specified to the walker * @return returns 0L
*/ */
public Long reduceInit() { public Long reduceInit() {
return 0L; return 0L;
@ -271,13 +279,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
public void onTraversalDone(Long result) { public void onTraversalDone(Long result) {
logger.info("Calculating quantized quality scores..."); logger.info("Calculating quantized quality scores...");
quantizeQualityScores(); quantizeQualityScores();
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) { if (!RAC.NO_PLOTS) {
logger.info("Generating recalibration plots..."); logger.info("Generating recalibration plots...");
generatePlots(); generatePlots();
} }
logger.info("Writing recalibration report...");
generateReport();
logger.info("...done!");
logger.info("Processed: " + result + " sites"); logger.info("Processed: " + result + " sites");
} }
@ -285,10 +296,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) { if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile); RecalibrationReport report = new RecalibrationReport(recalFile);
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
} }
else else
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
} }
@ -309,7 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created"); throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
} }
RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
} }
} }

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 6, 2010
*
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
public Datum(long numObservations, long numMismatches) {
this.numObservations = numObservations;
this.numMismatches = numMismatches;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
synchronized void increment(final boolean isError) {
numObservations++;
numMismatches += isError ? 1:0;
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Datum))
return false;
Datum other = (Datum) o;
return numMismatches == other.numMismatches && numObservations == other.numObservations;
}
}

View File

@ -1,148 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum extends Datum {
private static final double UNINITIALIZED = -1.0;
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
numObservations = _numObservations;
numMismatches = _numMismatches;
estimatedQReported = reportedQuality;
empiricalQuality = UNINITIALIZED;
}
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.numObservations;
this.numMismatches = copy.numMismatches;
this.estimatedQReported = copy.estimatedQReported;
this.empiricalQuality = copy.empiricalQuality;
}
public void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.numObservations, other.numMismatches);
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
empiricalQuality = UNINITIALIZED;
}
@Override
public void increment(final boolean isError) {
super.increment(isError);
empiricalQuality = UNINITIALIZED;
}
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
protected final void calcEmpiricalQuality() {
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public void setEstimatedQReported(final double estimatedQReported) {
this.estimatedQReported = estimatedQReported;
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public void setEmpiricalQuality(final double empiricalQuality) {
this.empiricalQuality = empiricalQuality;
}
public final double getEmpiricalQuality() {
if (empiricalQuality == UNINITIALIZED)
calcEmpiricalQuality();
return empiricalQuality;
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}
public String stringForCSV() {
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
}
/**
* We don't compare the estimated quality reported because it may be different when read from
* report tables.
*
* @param o the other recal datum
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof RecalDatum))
return false;
RecalDatum other = (RecalDatum) o;
return super.equals(o) &&
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
}
}

View File

@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File; import java.io.File;
import java.util.Collections; import java.util.Collections;
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
* reads which have had the reference inserted because of color space inconsistencies. * reads which have had the reference inserted because of color space inconsistencies.
*/ */
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
/** /**
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles * CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
* their color space tag can not be recalibrated. * their color space tag can not be recalibrated.
*/ */
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false) @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
/** /**
* The context covariate will use a context of this size to calculate it's covariate value for base mismatches * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@ -174,44 +175,44 @@ public class RecalibrationArgumentCollection {
public File recalibrationReport = null; public File recalibrationReport = null;
public GATKReportTable generateReportTable() { public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
argumentsTable.addColumn("Argument"); argumentsTable.addColumn("Argument");
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
argumentsTable.addRowID("covariate", true); argumentsTable.addRowID("covariate", true);
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
argumentsTable.addRowID("no_standard_covs", true); argumentsTable.addRowID("no_standard_covs", true);
argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
argumentsTable.addRowID("run_without_dbsnp", true); argumentsTable.addRowID("run_without_dbsnp", true);
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
argumentsTable.addRowID("solid_recal_mode", true); argumentsTable.addRowID("solid_recal_mode", true);
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
argumentsTable.addRowID("solid_nocall_strategy", true); argumentsTable.addRowID("solid_nocall_strategy", true);
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
argumentsTable.addRowID("mismatches_context_size", true); argumentsTable.addRowID("mismatches_context_size", true);
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true); argumentsTable.addRowID("indels_context_size", true);
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("mismatches_default_quality", true); argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("insertions_default_quality", true); argumentsTable.addRowID("insertions_default_quality", true);
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
argumentsTable.addRowID("low_quality_tail", true); argumentsTable.addRowID("low_quality_tail", true);
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
argumentsTable.addRowID("default_platform", true); argumentsTable.addRowID("default_platform", true);
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
argumentsTable.addRowID("force_platform", true); argumentsTable.addRowID("force_platform", true);
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true); argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true); argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true); argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true); argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true); argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable; return argumentsTable;
} }

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

View File

@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource; import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;
@ -45,15 +47,17 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
@Output(required = true) @Output(required = true)
private PrintStream out; private PrintStream out;
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
private int coverageThreshold = 20;
@Override @Override
// Look to see if the region has sufficient coverage // Look to see if the region has sufficient coverage
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
// note the linear probability scale // note the linear probability scale
int coverageThreshold = 20; return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
return Math.min((double) depth / coverageThreshold, 1);
} }
@ -74,8 +78,8 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
public Long reduce(final GenomeLoc value, Long reduce) { public Long reduce(final GenomeLoc value, Long reduce) {
if (value != null) { if (value != null) {
out.println(value.toString()); out.println(value.toString());
return reduce++; reduce++;
} else }
return reduce; return reduce;
} }

View File

@ -264,7 +264,7 @@ class SampleStatistics {
return false; return false;
// different contigs // different contigs
if (read.getMateReferenceIndex() != read.getReferenceIndex()) if (!read.getMateReferenceIndex().equals(read.getReferenceIndex()))
return false; return false;
// unmapped // unmapped

View File

@ -104,7 +104,9 @@ public class BAMDiffableReader implements DiffableReader {
InputStream fstream = new BufferedInputStream(new FileInputStream(file)); InputStream fstream = new BufferedInputStream(new FileInputStream(file));
if ( !BlockCompressedInputStream.isValidFile(fstream) ) if ( !BlockCompressedInputStream.isValidFile(fstream) )
return false; return false;
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream);
BCIS.read(buffer, 0, BAM_MAGIC.length);
BCIS.close();
return Arrays.equals(buffer, BAM_MAGIC); return Arrays.equals(buffer, BAM_MAGIC);
} catch ( IOException e ) { } catch ( IOException e ) {
return false; return false;

View File

@ -224,7 +224,7 @@ public class DiffNode extends DiffValue {
// X=(A=A B=B C=(D=D)) // X=(A=A B=B C=(D=D))
String[] parts = tree.split("=", 2); String[] parts = tree.split("=", 2);
if ( parts.length != 2 ) if ( parts.length != 2 )
throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts); throw new ReviewedStingException("Unexpected tree structure: " + tree);
String name = parts[0]; String name = parts[0];
String value = parts[1]; String value = parts[1];

View File

@ -90,8 +90,10 @@ public class GATKReportDiffableReader implements DiffableReader {
public boolean canRead(File file) { public boolean canRead(File file) {
try { try {
final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX; final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
char[] buff = new char[HEADER.length()]; final char[] buff = new char[HEADER.length()];
new FileReader(file).read(buff, 0, HEADER.length()); final FileReader FR = new FileReader(file);
FR.read(buff, 0, HEADER.length());
FR.close();
String firstLine = new String(buff); String firstLine = new String(buff);
return firstLine.startsWith(HEADER); return firstLine.startsWith(HEADER);
} catch (IOException e) { } catch (IOException e) {

View File

@ -47,7 +47,10 @@ import java.util.List;
* <p> * <p>
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'. * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
* Note that if there are multiple variants at a site, it takes the first one seen. * Several important notes:
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
* 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
* *
* <h2>Input</h2> * <h2>Input</h2>
@ -102,16 +105,16 @@ public class FastaAlternateReference extends FastaReference {
String refBase = String.valueOf((char)ref.getBase()); String refBase = String.valueOf((char)ref.getBase());
// Check to see if we have a called snp // Check to see if we have a called snp
for ( VariantContext vc : tracker.getValues(variants) ) { for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
if ( vc.isFiltered() ) if ( vc.isFiltered() )
continue; continue;
if ( vc.isSimpleDeletion()) { if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length(); deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one // delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase); return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) { } else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) { } else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString()); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} }

View File

@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model { public enum Model {
/** The default model with the best performance in all cases */ /** The default model with the best performance in all cases */
EXACT, EXACT
POOL
} }
protected int N; protected int N;

View File

@ -148,8 +148,8 @@ public class ConsensusAlleleCounter {
boolean foundKey = false; boolean foundKey = false;
// copy of hashmap into temp arrayList // copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>(); ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
for (String s : consensusIndelStrings.keySet()) { for (Map.Entry<String, Integer> s : consensusIndelStrings.entrySet()) {
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s))); cList.add(new Pair<String, Integer>(s.getKey(), s.getValue()));
} }
if (read.getAlignmentEnd() == loc.getStart()) { if (read.getAlignmentEnd() == loc.getStart()) {
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
// get ref bases of accurate deletion // get ref bases of accurate deletion
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen; stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
if (Allele.acceptableAlleleBases(refBases, false)) { if (Allele.acceptableAlleleBases(refBases, false)) {
refAllele = Allele.create(refBases, true); refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); altAllele = Allele.create(ref.getBase(), false);
} }
else continue; // don't go on with this allele if refBases are non-standard else continue; // don't go on with this allele if refBases are non-standard
} else { } else {
// insertion case // insertion case
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions final String insertionBases = (char)ref.getBase() + s; // add reference padding
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
altAllele = Allele.create(s, false); refAllele = Allele.create(ref.getBase(), true);
altAllele = Allele.create(insertionBases, false);
stop = loc.getStart(); stop = loc.getStart();
} }
else continue; // go on to next allele if consensus insertion has any non-standard base. else continue; // go on to next allele if consensus insertion has any non-standard base.
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
final VariantContextBuilder builder = new VariantContextBuilder().source(""); final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop); builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(Arrays.asList(refAllele, altAllele)); builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes(); builder.noGenotypes();
if (doMultiAllelicCalls) { if (doMultiAllelicCalls) {
vcs.add(builder.make()); vcs.add(builder.make());

View File

@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model { public enum Model {
SNP, SNP,
INDEL, INDEL,
BOTH, GeneralPloidySNP,
POOLSNP, GeneralPloidyINDEL,
POOLINDEL, BOTH
POOLBOTH
} }
public enum GENOTYPING_MODE { public enum GENOTYPING_MODE {

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,7 +47,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false; private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel; private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap = private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() { new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>()); indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear(); haplotypeMap.clear();
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
if (alleleList.isEmpty()) if (alleleList.isEmpty())
return null; return null;
} }
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty()) if (haplotypeMap == null || haplotypeMap.isEmpty())
return null; return null;
// start making the VariantContext // start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int eventLength = getEventLength(alleleList); final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create(); GenotypesContext genotypes = GenotypesContext.create();
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get(); return indelLikelihoodMap.get();
} }
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0);
int endLoc = loc.getStart() + refAllele.length()-1;
if (allelesArePadded)
endLoc++;
return endLoc;
}
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList, public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
final ReferenceContext ref, final ReferenceContext ref,
final GenomeLoc loc, final GenomeLoc loc,
@ -213,7 +198,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> contexts, final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType, final AlignmentContextUtils.ReadOrientation contextType,
@ -222,7 +207,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final boolean ignoreSNPAllelesWhenGenotypingIndels) { final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null; VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
// ignore places where we don't have a variant // ignore places where we don't have a variant
if (vc == null) if (vc == null)
return new Pair<List<Allele>,Boolean>(alleles,false); return alleles;
if (ignoreSNPAllelesWhenGenotypingIndels) { if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else { } else {
alleles.addAll(vc.getAlleles()); alleles.addAll(vc.getAlleles());
} }
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else { } else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
} }
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded); return alleles;
} }
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,

View File

@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements ); return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
} }
public class BAQedPileupElement extends PileupElement { public static class BAQedPileupElement extends PileupElement {
public BAQedPileupElement( final PileupElement PE ) { public BAQedPileupElement( final PileupElement PE ) {
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip()); super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
} }

View File

@ -26,11 +26,12 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
public class UnifiedArgumentCollection { public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
*/
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
/** /**
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it * distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
/**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
* is the default).
*/
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
/**
* This argument allows you to emit low quality calls as filtered records.
*/
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/** /**
* Note that calculating the SLOD increases the runtime by an appreciable amount. * Note that calculating the SLOD increases the runtime by an appreciable amount.
*/ */
@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false) @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false; public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/** /**
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value. * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -107,18 +75,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05; public Double MAX_DELETION_FRACTION = 0.05;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*/
@Advanced
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 3;
@Hidden @Hidden
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false; public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
// indel-related arguments // indel-related arguments
@ -139,19 +97,18 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false) @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25; public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
/** /**
* This argument informs the prior probability of having an indel at a site. * This argument informs the prior probability of having an indel at a site.
*/ */
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false) @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000; public double INDEL_HETEROZYGOSITY = 1.0/8000;
@Hidden @Advanced
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false) @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
public byte INDEL_GAP_CONTINUATION_PENALTY = 10; public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
@Hidden @Advanced
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false) @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
public byte INDEL_GAP_OPEN_PENALTY = 45; public byte INDEL_GAP_OPEN_PENALTY = 45;
@Hidden @Hidden
@ -181,7 +138,6 @@ public class UnifiedArgumentCollection {
Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
sample-level error model sample-level error model
*/ */
@Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false) @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
public boolean IGNORE_LANE_INFO = false; public boolean IGNORE_LANE_INFO = false;
@ -263,7 +219,6 @@ public class UnifiedArgumentCollection {
uac.referenceSampleName = referenceSampleName; uac.referenceSampleName = referenceSampleName;
uac.samplePloidy = samplePloidy; uac.samplePloidy = samplePloidy;
uac.maxQualityScore = minQualityScore; uac.maxQualityScore = minQualityScore;
uac.maxQualityScore = maxQualityScore;
uac.phredScaledPrior = phredScaledPrior; uac.phredScaledPrior = phredScaledPrior;
uac.minPower = minPower; uac.minPower = minPower;
uac.minReferenceDepth = minReferenceDepth; uac.minReferenceDepth = minReferenceDepth;
@ -276,5 +231,16 @@ public class UnifiedArgumentCollection {
return uac; return uac;
} }
public UnifiedArgumentCollection() { }
public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
super();
this.alleles = SCAC.alleles;
this.GenotypingMode = SCAC.GenotypingMode;
this.heterozygosity = SCAC.heterozygosity;
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
this.OutputMode = SCAC.OutputMode;
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
}
} }

View File

@ -82,7 +82,7 @@ import java.util.*;
* -o snps.raw.vcf \ * -o snps.raw.vcf \
* -stand_call_conf [50.0] \ * -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \ * -stand_emit_conf 10.0 \
* -dcov [50] \ * -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
* [-L targets.interval_list] * [-L targets.interval_list]
* </pre> * </pre>
* *
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
} else { } else {
// in full mode: check for consistency in ploidy/pool calling arguments // in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models // check for correct calculation models
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { /* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now // polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) { && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2"); throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
} }
*/
// get all of the unique sample names // get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear(); samples.clear();
@ -311,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// add the pool values for each genotype // add the pool values for each genotype
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
} }
if (UAC.referenceSampleName != null) { if (UAC.referenceSampleName != null) {
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth")); headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));

Some files were not shown because too many files have changed in this diff Show More