Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
a1596791af
|
|
@ -125,8 +125,8 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
*/
|
*/
|
||||||
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
||||||
BitSet result = null;
|
BitSet result = null;
|
||||||
if (offset >= contextSize) {
|
if (offset - contextSize + 1 >= 0) {
|
||||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1));
|
||||||
if (!context.contains("N"))
|
if (!context.contains("N"))
|
||||||
result = BitSetUtils.bitSetFrom(context);
|
result = BitSetUtils.bitSetFrom(context);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ public class Datum {
|
||||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
|
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
byte empiricalQualByte() {
|
byte empiricalQualByte() {
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
@ -152,7 +151,7 @@ public class RecalDataManager {
|
||||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||||
for (Covariate covariate : requiredCovariates) {
|
for (Covariate covariate : requiredCovariates) {
|
||||||
requiredCovariatesToAdd.add(covariate);
|
requiredCovariatesToAdd.add(covariate);
|
||||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
|
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ public class RecalDatum extends Datum {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void calcCombinedEmpiricalQuality() {
|
public final void calcCombinedEmpiricalQuality() {
|
||||||
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void calcEstimatedReportedQuality() {
|
public final void calcEstimatedReportedQuality() {
|
||||||
|
|
@ -102,7 +102,7 @@ public class RecalDatum extends Datum {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%d,%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()), (byte) Math.floor(getEstimatedQReported()));
|
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,13 +55,10 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||||
public Double PCR_error = DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.DEFAULT_PCR_ERROR_RATE;
|
public Double PCR_error = DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.DEFAULT_PCR_ERROR_RATE;
|
||||||
|
|
||||||
/**
|
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||||
* Specifies how to determine the alternate allele to use for genotyping
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
|
||||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||||
|
|
||||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||||
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -147,11 +144,11 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
|
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
|
||||||
public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
|
public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
|
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
|
||||||
public double INDEL_GAP_OPEN_PENALTY = 45.0;
|
public byte INDEL_GAP_OPEN_PENALTY = 45;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,8 @@ import java.util.*;
|
||||||
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
|
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
|
||||||
@Reference(window=@Window(start=-200,stop=200))
|
@Reference(window=@Window(start=-200,stop=200))
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
|
// TODO -- When LocusIteratorByState gets cleaned up, we should enable multiple @By sources:
|
||||||
|
// TODO -- @By( {DataSource.READS, DataSource.REFERENCE_ORDERED_DATA} )
|
||||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||||
public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker {
|
public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker {
|
||||||
|
|
||||||
|
|
@ -155,6 +157,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
||||||
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||||
protected PrintStream verboseWriter = null;
|
protected PrintStream verboseWriter = null;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
@Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false)
|
@Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false)
|
||||||
protected PrintStream metricsWriter = null;
|
protected PrintStream metricsWriter = null;
|
||||||
|
|
||||||
|
|
@ -347,14 +350,6 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(UGStatistics sum) {
|
public void onTraversalDone(UGStatistics sum) {
|
||||||
logger.info(String.format("Visited bases %d", sum.nBasesVisited));
|
|
||||||
logger.info(String.format("Callable bases %d", sum.nBasesCallable));
|
|
||||||
logger.info(String.format("Confidently called bases %d", sum.nBasesCalledConfidently));
|
|
||||||
logger.info(String.format("%% callable bases of all loci %3.3f", sum.percentCallableOfAll()));
|
|
||||||
logger.info(String.format("%% confidently called bases of all loci %3.3f", sum.percentCalledOfAll()));
|
|
||||||
logger.info(String.format("%% confidently called bases of callable loci %3.3f", sum.percentCalledOfCallable()));
|
|
||||||
logger.info(String.format("Actual calls made %d", sum.nCallsMade));
|
|
||||||
|
|
||||||
if ( metricsWriter != null ) {
|
if ( metricsWriter != null ) {
|
||||||
metricsWriter.println(String.format("Visited bases %d", sum.nBasesVisited));
|
metricsWriter.println(String.format("Visited bases %d", sum.nBasesVisited));
|
||||||
metricsWriter.println(String.format("Callable bases %d", sum.nBasesCallable));
|
metricsWriter.println(String.format("Callable bases %d", sum.nBasesCallable));
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,9 @@ import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.PairHMM;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -41,6 +43,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class PairHMMIndelErrorModel {
|
public class PairHMMIndelErrorModel {
|
||||||
|
|
@ -60,12 +63,12 @@ public class PairHMMIndelErrorModel {
|
||||||
private static final int START_HRUN_GAP_IDX = 4;
|
private static final int START_HRUN_GAP_IDX = 4;
|
||||||
private static final int MAX_HRUN_GAP_IDX = 20;
|
private static final int MAX_HRUN_GAP_IDX = 20;
|
||||||
|
|
||||||
private static final double MIN_GAP_OPEN_PENALTY = 30.0;
|
private static final byte MIN_GAP_OPEN_PENALTY = 30;
|
||||||
private static final double MIN_GAP_CONT_PENALTY = 10.0;
|
private static final byte MIN_GAP_CONT_PENALTY = 10;
|
||||||
private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
|
private static final byte GAP_PENALTY_HRUN_STEP = 1; // each increase in hrun decreases gap penalty by this.
|
||||||
|
|
||||||
private final double[] GAP_OPEN_PROB_TABLE;
|
private final byte[] GAP_OPEN_PROB_TABLE;
|
||||||
private final double[] GAP_CONT_PROB_TABLE;
|
private final byte[] GAP_CONT_PROB_TABLE;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Private Member Variables
|
// Private Member Variables
|
||||||
|
|
@ -86,42 +89,42 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
|
public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, boolean bandedLikelihoods) {
|
||||||
this.DEBUG = deb;
|
this.DEBUG = deb;
|
||||||
this.bandedLikelihoods = bandedLikelihoods;
|
this.bandedLikelihoods = bandedLikelihoods;
|
||||||
|
|
||||||
// fill gap penalty table, affine naive model:
|
// fill gap penalty table, affine naive model:
|
||||||
this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
|
this.GAP_CONT_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
|
||||||
this.GAP_OPEN_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
|
this.GAP_OPEN_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
|
||||||
|
|
||||||
double gop = -indelGOP/10.0;
|
|
||||||
double gcp = -indelGCP/10.0;
|
|
||||||
|
|
||||||
for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
|
for (int i = 0; i < START_HRUN_GAP_IDX; i++) {
|
||||||
GAP_OPEN_PROB_TABLE[i] = gop;
|
GAP_OPEN_PROB_TABLE[i] = indelGOP;
|
||||||
GAP_CONT_PROB_TABLE[i] = gcp;
|
GAP_CONT_PROB_TABLE[i] = indelGCP;
|
||||||
}
|
}
|
||||||
|
|
||||||
double step = GAP_PENALTY_HRUN_STEP/10.0;
|
double step = GAP_PENALTY_HRUN_STEP/10.0;
|
||||||
|
|
||||||
double maxGOP = -MIN_GAP_OPEN_PENALTY/10.0; // phred to log prob
|
// initialize gop and gcp to their default values
|
||||||
double maxGCP = -MIN_GAP_CONT_PENALTY/10.0; // phred to log prob
|
byte gop = indelGOP;
|
||||||
|
byte gcp = indelGCP;
|
||||||
|
|
||||||
|
// all of the following is computed in QUal-space
|
||||||
for (int i=START_HRUN_GAP_IDX; i < MAX_HRUN_GAP_IDX; i++) {
|
for (int i=START_HRUN_GAP_IDX; i < MAX_HRUN_GAP_IDX; i++) {
|
||||||
gop += step;
|
gop -= GAP_PENALTY_HRUN_STEP;
|
||||||
if (gop > maxGOP)
|
if (gop < MIN_GAP_OPEN_PENALTY)
|
||||||
gop = maxGOP;
|
gop = MIN_GAP_OPEN_PENALTY;
|
||||||
|
|
||||||
gcp += step;
|
gcp -= step;
|
||||||
if(gcp > maxGCP)
|
if(gcp < MIN_GAP_CONT_PENALTY)
|
||||||
gcp = maxGCP;
|
gcp = MIN_GAP_CONT_PENALTY;
|
||||||
GAP_OPEN_PROB_TABLE[i] = gop;
|
GAP_OPEN_PROB_TABLE[i] = gop;
|
||||||
GAP_CONT_PROB_TABLE[i] = gcp;
|
GAP_CONT_PROB_TABLE[i] = gcp;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
|
static private void getContextHomopolymerLength(final byte[] refBytes, final int[] hrunArray) {
|
||||||
// compute forward hrun length, example:
|
// compute forward hrun length, example:
|
||||||
// AGGTGACCCCCCTGAGAG
|
// AGGTGACCCCCCTGAGAG
|
||||||
// 001000012345000000
|
// 001000012345000000
|
||||||
|
|
@ -154,203 +157,9 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
|
private void fillGapProbabilities(final int[] hrunProfile,
|
||||||
double[] currentGOP, double[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
|
final byte[] contextLogGapOpenProbabilities,
|
||||||
if (indI > 0 && indJ > 0) {
|
final byte[] contextLogGapContinuationProbabilities) {
|
||||||
final int im1 = indI -1;
|
|
||||||
final int jm1 = indJ - 1;
|
|
||||||
// update current point
|
|
||||||
final byte x = readBases[im1];
|
|
||||||
final byte y = haplotypeBases[jm1];
|
|
||||||
final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
|
|
||||||
|
|
||||||
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
|
||||||
|
|
||||||
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
|
|
||||||
|
|
||||||
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
|
||||||
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
|
||||||
|
|
||||||
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
|
||||||
|
|
||||||
// update Y array
|
|
||||||
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
|
||||||
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
|
||||||
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
|
||||||
double[] currentGOP, double[] currentGCP, int indToStart,
|
|
||||||
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
|
|
||||||
|
|
||||||
final int X_METRIC_LENGTH = readBases.length+1;
|
|
||||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
|
||||||
|
|
||||||
if (indToStart == 0) {
|
|
||||||
// default initialization for all arrays
|
|
||||||
|
|
||||||
for (int i=0; i < X_METRIC_LENGTH; i++) {
|
|
||||||
Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
|
|
||||||
Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
|
|
||||||
Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i=1; i < X_METRIC_LENGTH; i++) {
|
|
||||||
//initialize first column
|
|
||||||
XMetricArray[i][0] = END_GAP_COST*(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j=1; j < Y_METRIC_LENGTH; j++) {
|
|
||||||
// initialize first row
|
|
||||||
YMetricArray[0][j] = END_GAP_COST*(j);
|
|
||||||
}
|
|
||||||
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
|
|
||||||
XMetricArray[0][0]= YMetricArray[0][0] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (bandedLikelihoods) {
|
|
||||||
final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
|
|
||||||
|
|
||||||
final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
|
|
||||||
final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
|
||||||
|
|
||||||
int idxWithMaxElement = 0;
|
|
||||||
|
|
||||||
for (int diag=indToStart; diag < numDiags; diag++) {
|
|
||||||
// compute default I and J start positions at edge of diagonals
|
|
||||||
int indI = 0;
|
|
||||||
int indJ = diag;
|
|
||||||
if (diag >= Y_METRIC_LENGTH ) {
|
|
||||||
indI = diag-(Y_METRIC_LENGTH-1);
|
|
||||||
indJ = Y_METRIC_LENGTH-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// first pass: from max element to edge
|
|
||||||
int idxLow = idxWithMaxElement;
|
|
||||||
|
|
||||||
// reset diag max value before starting
|
|
||||||
double maxElementInDiag = Double.NEGATIVE_INFINITY;
|
|
||||||
// set indI, indJ to correct values
|
|
||||||
indI += idxLow;
|
|
||||||
indJ -= idxLow;
|
|
||||||
if (indI >= X_METRIC_LENGTH || indJ < 0) {
|
|
||||||
idxLow--;
|
|
||||||
indI--;
|
|
||||||
indJ++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
for (int el = idxLow; el < elemsInDiag; el++) {
|
|
||||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
|
||||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
|
||||||
// update max in diagonal
|
|
||||||
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
|
|
||||||
|
|
||||||
// check if we've fallen off diagonal value by threshold
|
|
||||||
if (bestMetric > maxElementInDiag) {
|
|
||||||
maxElementInDiag = bestMetric;
|
|
||||||
idxWithMaxElement = el;
|
|
||||||
}
|
|
||||||
else if (bestMetric < maxElementInDiag - DIAG_TOL && idxWithMaxElement > 0)
|
|
||||||
break; // done w/current diagonal
|
|
||||||
|
|
||||||
indI++;
|
|
||||||
if (indI >=X_METRIC_LENGTH )
|
|
||||||
break;
|
|
||||||
indJ--;
|
|
||||||
if (indJ <= 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (idxLow > 0) {
|
|
||||||
// now do second part in opposite direction
|
|
||||||
indI = 0;
|
|
||||||
indJ = diag;
|
|
||||||
if (diag >= Y_METRIC_LENGTH ) {
|
|
||||||
indI = diag-(Y_METRIC_LENGTH-1);
|
|
||||||
indJ = Y_METRIC_LENGTH-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
indI += idxLow-1;
|
|
||||||
indJ -= idxLow-1;
|
|
||||||
for (int el = idxLow-1; el >= 0; el--) {
|
|
||||||
|
|
||||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
|
||||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
|
||||||
// update max in diagonal
|
|
||||||
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
|
|
||||||
|
|
||||||
// check if we've fallen off diagonal value by threshold
|
|
||||||
if (bestMetric > maxElementInDiag) {
|
|
||||||
maxElementInDiag = bestMetric;
|
|
||||||
idxWithMaxElement = el;
|
|
||||||
}
|
|
||||||
else if (bestMetric < maxElementInDiag - DIAG_TOL)
|
|
||||||
break; // done w/current diagonal
|
|
||||||
|
|
||||||
indJ++;
|
|
||||||
if (indJ >= Y_METRIC_LENGTH )
|
|
||||||
break;
|
|
||||||
indI--;
|
|
||||||
if (indI <= 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if (DEBUG)
|
|
||||||
// System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// simplified rectangular version of update loop
|
|
||||||
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
|
|
||||||
for (int indJ=indToStart+1; indJ < Y_METRIC_LENGTH; indJ++) {
|
|
||||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
|
||||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
|
||||||
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
|
|
||||||
|
|
||||||
/*
|
|
||||||
if (DEBUG) {
|
|
||||||
PrintStream outx, outy, outm, outs;
|
|
||||||
double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
try {
|
|
||||||
outx = new PrintStream("datax.txt");
|
|
||||||
outy = new PrintStream("datay.txt");
|
|
||||||
outm = new PrintStream("datam.txt");
|
|
||||||
outs = new PrintStream("datas.txt");
|
|
||||||
double metrics[] = new double[3];
|
|
||||||
for (int indI=0; indI < X_METRIC_LENGTH; indI++) {
|
|
||||||
for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) {
|
|
||||||
metrics[0] = matchMetricArray[indI][indJ];
|
|
||||||
metrics[1] = XMetricArray[indI][indJ];
|
|
||||||
metrics[2] = YMetricArray[indI][indJ];
|
|
||||||
//sumMetrics[indI][indJ] = MathUtils.softMax(metrics);
|
|
||||||
outx.format("%4.1f ", metrics[1]);
|
|
||||||
outy.format("%4.1f ", metrics[2]);
|
|
||||||
outm.format("%4.1f ", metrics[0]);
|
|
||||||
outs.format("%4.1f ", MathUtils.softMax(metrics));
|
|
||||||
}
|
|
||||||
outx.println(); outm.println();outy.println(); outs.println();
|
|
||||||
}
|
|
||||||
outm.close(); outx.close(); outy.close();
|
|
||||||
} catch (java.io.IOException e) { throw new UserException("bla");}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
return bestMetric;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void fillGapProbabilities(int[] hrunProfile,
|
|
||||||
double[] contextLogGapOpenProbabilities, double[] contextLogGapContinuationProbabilities) {
|
|
||||||
// fill based on lookup table
|
// fill based on lookup table
|
||||||
for (int i = 0; i < hrunProfile.length; i++) {
|
for (int i = 0; i < hrunProfile.length; i++) {
|
||||||
if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
|
if (hrunProfile[i] >= MAX_HRUN_GAP_IDX) {
|
||||||
|
|
@ -372,27 +181,8 @@ public class PairHMMIndelErrorModel {
|
||||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||||
int readIdx=0;
|
int readIdx=0;
|
||||||
|
|
||||||
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
|
||||||
LinkedHashMap<Allele,double[]> gapContProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
|
||||||
|
|
||||||
// will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
|
|
||||||
// todo -- refactor into separate function
|
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
|
||||||
Haplotype haplotype = haplotypeMap.get(a);
|
|
||||||
byte[] haplotypeBases = haplotype.getBases();
|
|
||||||
double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
|
|
||||||
double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
|
|
||||||
|
|
||||||
// get homopolymer length profile for current haplotype
|
|
||||||
int[] hrunProfile = new int[haplotypeBases.length];
|
|
||||||
getContextHomopolymerLength(haplotypeBases,hrunProfile);
|
|
||||||
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
|
||||||
|
|
||||||
gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
|
|
||||||
gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
PairHMM pairHMM = new PairHMM(bandedLikelihoods);
|
||||||
for (PileupElement p: pileup) {
|
for (PileupElement p: pileup) {
|
||||||
// > 1 when the read is a consensus read representing multiple independent observations
|
// > 1 when the read is a consensus read representing multiple independent observations
|
||||||
readCounts[readIdx] = p.getRepresentativeCount();
|
readCounts[readIdx] = p.getRepresentativeCount();
|
||||||
|
|
@ -406,14 +196,32 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
||||||
|
}
|
||||||
// System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
// System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
||||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||||
|
|
||||||
if (read.isEmpty())
|
if (read.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if(ReadUtils.is454Read(read)) {
|
if (read.getUnclippedEnd() > ref.getWindow().getStop())
|
||||||
|
read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop());
|
||||||
|
|
||||||
|
if (read.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
if (read.getUnclippedStart() < ref.getWindow().getStart())
|
||||||
|
read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart());
|
||||||
|
|
||||||
|
if (read.isEmpty())
|
||||||
|
continue;
|
||||||
|
// hard-clip low quality ends - this may introduce extra H elements in CIGAR string
|
||||||
|
read = ReadClipper.hardClipLowQualEnds(read,(byte)BASE_QUAL_THRESHOLD );
|
||||||
|
|
||||||
|
if (read.isEmpty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
|
||||||
// get bases of candidate haplotypes that overlap with reads
|
// get bases of candidate haplotypes that overlap with reads
|
||||||
final int trailingBases = 3;
|
final int trailingBases = 3;
|
||||||
|
|
@ -469,54 +277,56 @@ public class PairHMMIndelErrorModel {
|
||||||
unclippedReadBases = read.getReadBases();
|
unclippedReadBases = read.getReadBases();
|
||||||
unclippedReadQuals = read.getBaseQualities();
|
unclippedReadQuals = read.getBaseQualities();
|
||||||
|
|
||||||
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
|
final int extraOffset = Math.abs(eventLength);
|
||||||
// and may leave a string of Q2 bases still hanging off the reads.
|
|
||||||
for (int i=numStartSoftClippedBases; i < unclippedReadBases.length; i++) {
|
|
||||||
if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
|
|
||||||
numStartClippedBases++;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
|
|
||||||
}
|
/**
|
||||||
for (int i=unclippedReadBases.length-numEndSoftClippedBases-1; i >= 0; i-- ){
|
* Compute genomic locations that candidate haplotypes will span.
|
||||||
if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD)
|
* Read start and stop locations (variables readStart and readEnd) are the original unclipped positions from SAMRecord,
|
||||||
numEndClippedBases++;
|
* adjusted by hard clips from Cigar string and by qual-based soft-clipping performed above.
|
||||||
else
|
* We will propose haplotypes that overlap the read with some padding.
|
||||||
break;
|
* True read start = readStart + numStartClippedBases - ReadUtils.getFirstInsertionOffset(read)
|
||||||
}
|
* Last term is because if a read starts with an insertion then these bases are not accounted for in readStart.
|
||||||
|
* trailingBases is a padding constant(=3) and we additionally add abs(eventLength) to both sides of read to be able to
|
||||||
|
* differentiate context between two haplotypes
|
||||||
|
*/
|
||||||
|
long startLocationInRefForHaplotypes = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
|
||||||
|
long stopLocationInRefForHaplotypes = readEnd -numEndClippedBases + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;
|
||||||
|
|
||||||
int extraOffset = Math.abs(eventLength);
|
if (DEBUG)
|
||||||
|
System.out.format("orig Start:%d orig stop: %d\n", startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes);
|
||||||
|
|
||||||
long start = Math.max(readStart + numStartClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read)-extraOffset, 0);
|
|
||||||
long stop = readEnd -numEndClippedBases + trailingBases + ReadUtils.getLastInsertionOffset(read)+extraOffset;
|
|
||||||
|
|
||||||
// Variables start and stop are coordinates (inclusive) where we want to get the haplotype from.
|
|
||||||
int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;
|
int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;
|
||||||
// check if start of read will be before start of reference context
|
// check if start of read will be before start of reference context
|
||||||
if (start < ref.getWindow().getStart())// read starts before haplotype: read will have to be cut
|
if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
|
||||||
start = ref.getWindow().getStart();
|
// read starts before haplotype: read will have to be cut
|
||||||
|
//numStartClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
|
||||||
|
startLocationInRefForHaplotypes = ref.getWindow().getStart();
|
||||||
|
}
|
||||||
// check also if end of read will go beyond reference context
|
// check also if end of read will go beyond reference context
|
||||||
if (stop > ref.getWindow().getStop())
|
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
|
||||||
stop = ref.getWindow().getStop();
|
//numEndClippedBases += stopLocationInRefForHaplotypes - ref.getWindow().getStop();
|
||||||
|
stopLocationInRefForHaplotypes = ref.getWindow().getStop();
|
||||||
|
}
|
||||||
|
|
||||||
// if there's an insertion in the read, the read stop position will be less than start + read length,
|
// if there's an insertion in the read, the read stop position will be less than start + read legnth,
|
||||||
// but we want to compute likelihoods in the whole region that a read might overlap
|
// but we want to compute likelihoods in the whole region that a read might overlap
|
||||||
if (stop <= start + readLength) {
|
if (stopLocationInRefForHaplotypes <= startLocationInRefForHaplotypes + readLength) {
|
||||||
stop = start + readLength-1;
|
stopLocationInRefForHaplotypes = startLocationInRefForHaplotypes + readLength-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ok, we now figured out total number of clipped bases on both ends.
|
// ok, we now figured out total number of clipped bases on both ends.
|
||||||
// Figure out where we want to place the haplotype to score read against
|
// Figure out where we want to place the haplotype to score read against
|
||||||
/*
|
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
|
System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
|
||||||
numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
|
numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we'll end up with an empty read once all clipping is done
|
||||||
|
*/
|
||||||
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
|
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
|
||||||
int j=0;
|
int j=0;
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
|
@ -537,69 +347,67 @@ public class PairHMMIndelErrorModel {
|
||||||
// initialize path metric and traceback memories for likelihood computation
|
// initialize path metric and traceback memories for likelihood computation
|
||||||
double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
|
double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
|
||||||
byte[] previousHaplotypeSeen = null;
|
byte[] previousHaplotypeSeen = null;
|
||||||
double[] previousGOP = null;
|
final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
|
||||||
double[] previousGCP = null;
|
final byte[] contextLogGapContinuationProbabilities = new byte[readBases.length];
|
||||||
int startIdx;
|
|
||||||
|
// get homopolymer length profile for current haplotype
|
||||||
|
int[] hrunProfile = new int[readBases.length];
|
||||||
|
getContextHomopolymerLength(readBases,hrunProfile);
|
||||||
|
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
||||||
|
|
||||||
|
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
|
||||||
|
|
||||||
Haplotype haplotype = haplotypeMap.get(a);
|
Haplotype haplotype = haplotypeMap.get(a);
|
||||||
if (stop > haplotype.getStopPosition())
|
|
||||||
stop = haplotype.getStopPosition();
|
|
||||||
|
|
||||||
if (start < haplotype.getStartPosition())
|
if (stopLocationInRefForHaplotypes > haplotype.getStopPosition())
|
||||||
start = haplotype.getStartPosition();
|
stopLocationInRefForHaplotypes = haplotype.getStopPosition();
|
||||||
|
|
||||||
// cut haplotype bases
|
if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
|
||||||
long indStart = start - haplotype.getStartPosition();
|
startLocationInRefForHaplotypes = haplotype.getStartPosition();
|
||||||
long indStop = stop - haplotype.getStartPosition();
|
|
||||||
|
final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||||
|
final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||||
|
|
||||||
double readLikelihood;
|
double readLikelihood;
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
|
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
|
||||||
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength(), read.getCigar().toString());
|
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (indStart < 0 || indStop >= haplotype.getBases().length || indStart > indStop) {
|
|
||||||
// read spanned more than allowed reference context: we currently can't deal with this
|
|
||||||
readLikelihood =0;
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||||
(int)indStart, (int)indStop);
|
(int)indStart, (int)indStop);
|
||||||
|
|
||||||
if (matchMetricArray == null) {
|
final int X_METRIC_LENGTH = readBases.length+2;
|
||||||
final int X_METRIC_LENGTH = readBases.length+1;
|
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
|
||||||
|
|
||||||
|
if (matchMetricArray == null) {
|
||||||
|
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||||
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
|
|
||||||
|
|
||||||
|
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||||
}
|
}
|
||||||
final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
|
|
||||||
final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
|
int startIndexInHaplotype = 0;
|
||||||
if (previousHaplotypeSeen == null)
|
if (previousHaplotypeSeen != null)
|
||||||
startIdx = 0;
|
startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
||||||
else {
|
|
||||||
final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
|
||||||
final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP);
|
|
||||||
final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP);
|
|
||||||
startIdx = Math.min(Math.min(s1, s2), s3);
|
|
||||||
}
|
|
||||||
previousHaplotypeSeen = haplotypeBases.clone();
|
previousHaplotypeSeen = haplotypeBases.clone();
|
||||||
previousGOP = currentContextGOP.clone();
|
|
||||||
previousGCP = currentContextGCP.clone();
|
|
||||||
|
|
||||||
|
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
||||||
|
contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
|
||||||
|
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
|
||||||
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
|
|
||||||
currentContextGOP, currentContextGCP, startIdx, matchMetricArray, XMetricArray, YMetricArray);
|
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("H:"+new String(haplotypeBases));
|
System.out.println("H:"+new String(haplotypeBases));
|
||||||
System.out.println("R:"+new String(readBases));
|
System.out.println("R:"+new String(readBases));
|
||||||
System.out.format("L:%4.2f\n",readLikelihood);
|
System.out.format("L:%4.2f\n",readLikelihood);
|
||||||
System.out.format("StPos:%d\n", startIdx);
|
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
readEl.put(a,readLikelihood);
|
readEl.put(a,readLikelihood);
|
||||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,8 @@ import java.util.*;
|
||||||
public class AlleleCount extends VariantStratifier {
|
public class AlleleCount extends VariantStratifier {
|
||||||
@Override
|
@Override
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals();
|
|
||||||
|
|
||||||
// we can only work with a single eval VCF, and it must have genotypes
|
// we can only work with a single eval VCF, and it must have genotypes
|
||||||
if ( evals.size() != 1 )
|
if ( getVariantEvalWalker().getEvals().size() != 1 && !getVariantEvalWalker().mergeEvals )
|
||||||
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
|
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
|
||||||
|
|
||||||
// There are 2 x n sample chromosomes for diploids
|
// There are 2 x n sample chromosomes for diploids
|
||||||
|
|
|
||||||
|
|
@ -241,14 +241,6 @@ public class VariantDataManager {
|
||||||
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
|
|
||||||
// normalize QD by event length for indel case
|
|
||||||
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
|
|
||||||
if (eventLength > 0) { // sanity check
|
|
||||||
value /= (double)eventLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
||||||
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
||||||
} catch( Exception e ) {
|
} catch( Exception e ) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,259 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 3/1/12
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class PairHMM {
|
||||||
|
private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE;
|
||||||
|
private static final byte DEFAULT_GOP = (byte) 45;
|
||||||
|
private static final byte DEFAULT_GCP = (byte) 10;
|
||||||
|
private static final double BANDING_TOLERANCE = 22.0;
|
||||||
|
private static final int BANDING_CLUSTER_WINDOW = 12;
|
||||||
|
private final boolean noBanded;
|
||||||
|
|
||||||
|
public PairHMM() {
|
||||||
|
noBanded = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PairHMM( final boolean noBanded ) {
|
||||||
|
this.noBanded = noBanded;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
|
||||||
|
final int X_METRIC_LENGTH) {
|
||||||
|
|
||||||
|
for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
|
||||||
|
Arrays.fill(matchMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||||
|
Arrays.fill(XMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||||
|
Arrays.fill(YMetricArray[iii], Double.NEGATIVE_INFINITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the initial condition
|
||||||
|
matchMetricArray[1][1] = 0.0; // Math.log10(1.0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
|
||||||
|
@Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
|
||||||
|
public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
|
||||||
|
final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP ) {
|
||||||
|
|
||||||
|
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||||
|
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||||
|
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||||
|
|
||||||
|
// initial arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||||
|
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
|
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
|
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
|
|
||||||
|
initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||||
|
|
||||||
|
return computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"readBases.length == readQuals.length","readBases.length == insertionGOP.length","readBases.length == deletionGOP.length","readBases.length == overallGCP.length"})
|
||||||
|
@Ensures({"!Double.isInfinite(result)", "!Double.isNaN(result)"}) // Result should be a proper log10 probability
|
||||||
|
public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
|
||||||
|
final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex,
|
||||||
|
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||||
|
|
||||||
|
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||||
|
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||||
|
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||||
|
|
||||||
|
// ensure that all the qual scores have valid values
|
||||||
|
for( int iii = 0; iii < readQuals.length; iii++ ) {
|
||||||
|
readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( false ) {
|
||||||
|
final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
|
||||||
|
final ArrayList<Integer> workToBeAdded = new ArrayList<Integer>();
|
||||||
|
final ArrayList<Double> calculatedValues = new ArrayList<Double>();
|
||||||
|
final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH - 1;
|
||||||
|
workQueue.add( 1 ); // Always start a new thread at the baseline because of partially repeating sequences that match better in the latter half of the haplotype
|
||||||
|
|
||||||
|
for(int diag = 3; diag < numDiags; diag++) { // diag = 3 is the (1,2) element of the metric arrays. (1,1) is the initial condition and is purposefully skipped over
|
||||||
|
//Collections.sort(workQueue); // no need to sort because elements are guaranteed to be in ascending order
|
||||||
|
int el = 1;
|
||||||
|
for( int work : workQueue ) {
|
||||||
|
// choose the appropriate diagonal baseline location
|
||||||
|
int iii = 0;
|
||||||
|
int jjj = diag;
|
||||||
|
if( diag > Y_METRIC_LENGTH ) {
|
||||||
|
iii = diag - Y_METRIC_LENGTH;
|
||||||
|
jjj = Y_METRIC_LENGTH;
|
||||||
|
}
|
||||||
|
// move to the starting work location along the diagonal
|
||||||
|
iii += work;
|
||||||
|
jjj -= work;
|
||||||
|
while( iii >= X_METRIC_LENGTH || jjj <= 0 ) {
|
||||||
|
iii--;
|
||||||
|
jjj++;
|
||||||
|
work--;
|
||||||
|
}
|
||||||
|
if( !detectClusteredStartLocations(workToBeAdded, work ) ) {
|
||||||
|
workToBeAdded.add(work); // keep this thread going once it has started
|
||||||
|
}
|
||||||
|
|
||||||
|
if( work >= el - 3 ) {
|
||||||
|
// step along the diagonal in the forward direction, updating the match matrices and looking for a drop off from the maximum observed value
|
||||||
|
double maxElement = Double.NEGATIVE_INFINITY;
|
||||||
|
for( el = work; el < numDiags + 1; el++ ) {
|
||||||
|
updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
|
||||||
|
insertionGOP, deletionGOP, overallGCP, matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
|
||||||
|
calculatedValues.add(bestMetric);
|
||||||
|
if( bestMetric > maxElement ) {
|
||||||
|
maxElement = bestMetric;
|
||||||
|
} else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if( ++iii >= X_METRIC_LENGTH ) { // don't walk off the edge of the matrix
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if( --jjj <= 0 ) { // don't walk off the edge of the matrix
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find a local maximum to start a new band in the work queue
|
||||||
|
double localMaxElement = Double.NEGATIVE_INFINITY;
|
||||||
|
int localMaxElementIndex = 0;
|
||||||
|
for(int kkk = calculatedValues.size()-1; kkk >= 1; kkk--) {
|
||||||
|
final double bestMetric = calculatedValues.get(kkk);
|
||||||
|
if( bestMetric > localMaxElement ) {
|
||||||
|
localMaxElement = bestMetric;
|
||||||
|
localMaxElementIndex = kkk;
|
||||||
|
} else if( localMaxElement - bestMetric > BANDING_TOLERANCE * 0.5 ) { // find a local maximum
|
||||||
|
if( !detectClusteredStartLocations(workToBeAdded, work + localMaxElementIndex ) ) {
|
||||||
|
workToBeAdded.add( work + localMaxElementIndex );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
calculatedValues.clear();
|
||||||
|
|
||||||
|
// reset iii and jjj to the appropriate diagonal baseline location
|
||||||
|
iii = 0;
|
||||||
|
jjj = diag;
|
||||||
|
if( diag > Y_METRIC_LENGTH ) {
|
||||||
|
iii = diag - Y_METRIC_LENGTH;
|
||||||
|
jjj = Y_METRIC_LENGTH;
|
||||||
|
}
|
||||||
|
// move to the starting work location along the diagonal
|
||||||
|
iii += work-1;
|
||||||
|
jjj -= work-1;
|
||||||
|
|
||||||
|
// step along the diagonal in the reverse direction, updating the match matrices and looking for a drop off from the maximum observed value
|
||||||
|
for( int traceBack = work - 1; traceBack > 0 && iii > 0 && jjj < Y_METRIC_LENGTH; traceBack--,iii--,jjj++ ) {
|
||||||
|
updateCell(iii, jjj, haplotypeBases, readBases, readQuals,
|
||||||
|
insertionGOP, deletionGOP, overallGCP, matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
final double bestMetric = MathUtils.max(matchMetricArray[iii][jjj], XMetricArray[iii][jjj], YMetricArray[iii][jjj]);
|
||||||
|
if( bestMetric > maxElement ) {
|
||||||
|
maxElement = bestMetric;
|
||||||
|
} else if( maxElement - bestMetric > BANDING_TOLERANCE ) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workQueue.clear();
|
||||||
|
workQueue.addAll(workToBeAdded);
|
||||||
|
workToBeAdded.clear();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// simple rectangular version of update loop, slow
|
||||||
|
for( int iii = 1; iii < X_METRIC_LENGTH; iii++ ) {
|
||||||
|
for( int jjj = hapStartIndex + 1; jjj < Y_METRIC_LENGTH; jjj++ ) {
|
||||||
|
if( (iii == 1 && jjj == 1) ) { continue; }
|
||||||
|
updateCell(iii, jjj, haplotypeBases, readBases, readQuals, insertionGOP, deletionGOP, overallGCP,
|
||||||
|
matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// final probability is the log10 sum of the last element in all three state arrays
|
||||||
|
final int endI = X_METRIC_LENGTH - 1;
|
||||||
|
final int endJ = Y_METRIC_LENGTH - 1;
|
||||||
|
return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
|
||||||
|
final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
|
||||||
|
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||||
|
|
||||||
|
// the read and haplotype indices are offset by one because the state arrays have an extra column to hold the initial conditions
|
||||||
|
final int im1 = indI - 1;
|
||||||
|
final int jm1 = indJ - 1;
|
||||||
|
|
||||||
|
// update the match array
|
||||||
|
double pBaseReadLog10 = 0.0; // Math.log10(1.0);
|
||||||
|
if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
|
||||||
|
final byte x = readBases[im1-1];
|
||||||
|
final byte y = haplotypeBases[jm1-1];
|
||||||
|
final byte qual = readQuals[im1-1];
|
||||||
|
pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
|
||||||
|
}
|
||||||
|
final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
|
||||||
|
final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
|
||||||
|
final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
|
||||||
|
matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0);
|
||||||
|
|
||||||
|
// update the X (insertion) array
|
||||||
|
final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
|
||||||
|
final double e1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GCP) : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||||
|
final double qBaseReadLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
|
||||||
|
XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
|
||||||
|
|
||||||
|
// update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
|
||||||
|
final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
|
||||||
|
final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||||
|
final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
|
||||||
|
YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// private function used by the banded approach to ensure the proposed bands are sufficiently distinct from each other
|
||||||
|
private boolean detectClusteredStartLocations( final ArrayList<Integer> list, int loc ) {
|
||||||
|
for(int x : list) {
|
||||||
|
if( Math.abs(x-loc) <= BANDING_CLUSTER_WINDOW ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,6 +9,7 @@ import net.sf.samtools.SAMUtils;
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class QualityUtils {
|
public class QualityUtils {
|
||||||
|
public final static byte MAX_RECALIBRATED_Q_SCORE = 50;
|
||||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||||
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,9 +68,9 @@ public class BaseRecalibration {
|
||||||
/**
|
/**
|
||||||
* This constructor only exists for testing purposes.
|
* This constructor only exists for testing purposes.
|
||||||
*
|
*
|
||||||
* @param quantizationInfo
|
* @param quantizationInfo the quantization info object
|
||||||
* @param keysAndTablesMap
|
* @param keysAndTablesMap the map of key managers and recalibration tables
|
||||||
* @param requestedCovariates
|
* @param requestedCovariates the list of requested covariates
|
||||||
*/
|
*/
|
||||||
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
|
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
|
||||||
this.quantizationInfo = quantizationInfo;
|
this.quantizationInfo = quantizationInfo;
|
||||||
|
|
@ -179,9 +179,8 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||||
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
|
||||||
|
|
||||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,8 @@ public class ContextCovariateUnitTest {
|
||||||
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
String expectedContext = null;
|
String expectedContext = null;
|
||||||
if (i >= contextSize) {
|
if (i - contextSize + 1 >= 0) {
|
||||||
String context = bases.substring(i - contextSize, i);
|
String context = bases.substring(i - contextSize + 1, i + 1);
|
||||||
if (!context.contains("N"))
|
if (!context.contains("N"))
|
||||||
expectedContext = context;
|
expectedContext = context;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSamplePilot1() {
|
public void testMultiSamplePilot1() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||||
Arrays.asList("d3191b2f10139c969501990ffdf29082"));
|
Arrays.asList("9b08dc6800ba11bc6d9f6ccf392a60fe"));
|
||||||
executeTest("test MultiSample Pilot1", spec);
|
executeTest("test MultiSample Pilot1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testSingleSamplePilot2() {
|
public void testSingleSamplePilot2() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("7c7288170c6aadae555a44e79ca5bf19"));
|
Arrays.asList("d275e0f75368dbff012ea8655dce3444"));
|
||||||
executeTest("test SingleSample Pilot2", spec);
|
executeTest("test SingleSample Pilot2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultipleSNPAlleles() {
|
public void testMultipleSNPAlleles() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||||
Arrays.asList("c956f0ea0e5f002288a09f4bc4af1319"));
|
Arrays.asList("e948543b83bfd0640fcb994d72f8e234"));
|
||||||
executeTest("test Multiple SNP alleles", spec);
|
executeTest("test Multiple SNP alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -80,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private final static String COMPRESSED_OUTPUT_MD5 = "2158eb918abb95225ea5372fcd9c9236";
|
private final static String COMPRESSED_OUTPUT_MD5 = "1e3c897794e5763a8720807686707b18";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCompressedOutput() {
|
public void testCompressedOutput() {
|
||||||
|
|
@ -101,7 +101,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||||
|
|
||||||
String md5 = "834e85f6af4ad4a143b913dfc7defb08";
|
String md5 = "06d11ed89f02f08911e100df0f7db7a4";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||||
|
|
@ -200,8 +200,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHeterozyosity() {
|
public void testHeterozyosity() {
|
||||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||||
e.put( 0.01, "d5879f1c277035060434d79a441b31ca" );
|
e.put( 0.01, "d07e5ca757fbcb1c03f652f82265c2f8" );
|
||||||
e.put( 1.0 / 1850, "13f80245bab2321b92d27eebd5c2fc33" );
|
e.put( 1.0 / 1850, "d1fb9186e6f39f2bcf5d0edacd8f7fe2" );
|
||||||
|
|
||||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -225,7 +225,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,100,000",
|
" -L 1:10,000,000-10,100,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("8c134a6e0abcc70d2ed3216d5f8e0100"));
|
Arrays.asList("623be1fd8b63a01bfe35ac864d5199fe"));
|
||||||
|
|
||||||
executeTest(String.format("test multiple technologies"), spec);
|
executeTest(String.format("test multiple technologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -L 1:10,000,000-10,100,000" +
|
" -L 1:10,000,000-10,100,000" +
|
||||||
" -baq CALCULATE_AS_NECESSARY",
|
" -baq CALCULATE_AS_NECESSARY",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("34baad3177712f6cd0b476f4c578e08f"));
|
Arrays.asList("40ea10c0238c3be2991d31ae72476884"));
|
||||||
|
|
||||||
executeTest(String.format("test calling with BAQ"), spec);
|
executeTest(String.format("test calling with BAQ"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("4bf4f819a39a73707cae60fe30478742"));
|
Arrays.asList("c9b0bd900a4ec949adfbd28909581eeb"));
|
||||||
|
|
||||||
executeTest(String.format("test indel caller in SLX"), spec);
|
executeTest(String.format("test indel caller in SLX"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -minIndelCnt 1" +
|
" -minIndelCnt 1" +
|
||||||
" -L 1:10,000,000-10,100,000",
|
" -L 1:10,000,000-10,100,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("ae08fbd6b0618cf3ac1be763ed7b41ca"));
|
Arrays.asList("6b7c8691c527facf9884c2517d943f2f"));
|
||||||
|
|
||||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("120600f2bfa3a47bd93b50f768f98d5b"));
|
Arrays.asList("d72603aa33a086d64d4dddfd2995552f"));
|
||||||
|
|
||||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -301,7 +301,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("2e75d2766235eab23091a67ea2947d13"));
|
Arrays.asList("4a59fe207949b7d043481d7c1b786573"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -311,7 +311,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("5057bd7d07111e8b1085064782eb6c80"));
|
Arrays.asList("a8a9ccf30bddee94bb1d300600794ee7"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -319,13 +319,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSampleIndels() {
|
public void testMultiSampleIndels() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("c0f9ca3ceab90ebd38cc0eec9441d71f"));
|
Arrays.asList("0b388936022539530f565da14d5496d3"));
|
||||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("0240f34e71f137518be233c9890a5349"));
|
Arrays.asList("537dd9b4174dc356fb13d8d3098ad602"));
|
||||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -368,7 +368,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMinIndelFraction0() {
|
public void testMinIndelFraction0() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||||
Arrays.asList("53758e66e3a3188bd9c78d2329d41962"));
|
Arrays.asList("973178b97efd2daacc9e45c414275d59"));
|
||||||
executeTest("test minIndelFraction 0.0", spec);
|
executeTest("test minIndelFraction 0.0", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -376,7 +376,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMinIndelFraction25() {
|
public void testMinIndelFraction25() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||||
Arrays.asList("3aa39b1f6f3b1eb051765f9c21f6f461"));
|
Arrays.asList("220facd2eb0923515d1d8ab874055564"));
|
||||||
executeTest("test minIndelFraction 0.25", spec);
|
executeTest("test minIndelFraction 0.25", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
|
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
|
||||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||||
|
private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf";
|
||||||
|
private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf";
|
||||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
||||||
|
|
||||||
private static String cmdRoot = "-T VariantEval" +
|
private static String cmdRoot = "-T VariantEval" +
|
||||||
|
|
@ -437,24 +439,69 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testAlleleCountStrat() {
|
public void testAlleleCountStrat() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
buildCommandLine(
|
buildCommandLine(
|
||||||
"-T VariantEval",
|
"-T VariantEval",
|
||||||
"-R " + b37KGReference,
|
"-R " + b37KGReference,
|
||||||
"--dbsnp " + b37dbSNP132,
|
"--dbsnp " + b37dbSNP132,
|
||||||
"--eval " + fundamentalTestSNPsVCF,
|
"--eval " + fundamentalTestSNPsVCF,
|
||||||
"-noEV",
|
"-noEV",
|
||||||
"-EV CountVariants",
|
"-EV CountVariants",
|
||||||
"-noST",
|
"-noST",
|
||||||
"-ST AlleleCount",
|
"-ST AlleleCount",
|
||||||
"-L " + fundamentalTestSNPsVCF,
|
"-L " + fundamentalTestSNPsVCF,
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("1198bfea6183bd43219071a84c79a386")
|
Arrays.asList("1198bfea6183bd43219071a84c79a386")
|
||||||
);
|
);
|
||||||
executeTest("testAlleleCountStrat", spec);
|
executeTest("testAlleleCountStrat", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleEvalTracksAlleleCountWithMerge() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--dbsnp " + b37dbSNP132,
|
||||||
|
"--eval " + fundamentalTestSNPsSplit1of2VCF,
|
||||||
|
"--eval " + fundamentalTestSNPsSplit2of2VCF,
|
||||||
|
"--mergeEvals",
|
||||||
|
"-noEV",
|
||||||
|
"-EV CountVariants",
|
||||||
|
"-noST",
|
||||||
|
"-ST AlleleCount",
|
||||||
|
"-L " + fundamentalTestSNPsVCF,
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
Arrays.asList("1198bfea6183bd43219071a84c79a386")
|
||||||
|
);
|
||||||
|
executeTest("testMultipleEvalTracksAlleleCountWithMerge", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleEvalTracksAlleleCountWithoutMerge() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--dbsnp " + b37dbSNP132,
|
||||||
|
"--eval " + fundamentalTestSNPsSplit1of2VCF,
|
||||||
|
"--eval " + fundamentalTestSNPsSplit2of2VCF,
|
||||||
|
//"--mergeEvals", No merge with AC strat ==> error
|
||||||
|
"-noEV",
|
||||||
|
"-EV CountVariants",
|
||||||
|
"-noST",
|
||||||
|
"-ST AlleleCount",
|
||||||
|
"-L " + fundamentalTestSNPsVCF
|
||||||
|
),
|
||||||
|
0,
|
||||||
|
UserException.class
|
||||||
|
);
|
||||||
|
executeTest("testMultipleEvalTracksAlleleCountWithoutMerge", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIntervalStrat() {
|
public void testIntervalStrat() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
|
||||||
|
|
@ -73,9 +73,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
"6d7ee4cb651c8b666e4a4523363caaff", // tranches
|
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||||
"ee5b408c8434a594496118875690c438", // recal file
|
"cf380d9b0ae04c8918be8425f82035b4", // recal file
|
||||||
"5d7e07d8813db96ba3f3dfe4737f83d1"); // cut VCF
|
"b00e5e5a6807df8ed1682317948e8a6d"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRIndelTest")
|
@DataProvider(name = "VRIndelTest")
|
||||||
public Object[][] createData2() {
|
public Object[][] createData2() {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,367 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class PairHMMUnitTest extends BaseTest {
|
||||||
|
final static boolean EXTENSIVE_TESTING = true;
|
||||||
|
PairHMM hmm = new PairHMM( false ); // reference implementation
|
||||||
|
PairHMM bandedHMM = new PairHMM( true ); // algorithm with banding
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Provider
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class BasicLikelihoodTestProvider extends TestDataProvider {
|
||||||
|
final String ref, read;
|
||||||
|
final byte[] refBasesWithContext, readBasesWithContext;
|
||||||
|
final int baseQual, insQual, delQual, gcp;
|
||||||
|
final int expectedQual;
|
||||||
|
final static String CONTEXT = "ACGTAATGACGATTGCA";
|
||||||
|
final static String LEFT_FLANK = "GATTTATCATCGAGTCTGC";
|
||||||
|
final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTTA";
|
||||||
|
|
||||||
|
public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
|
||||||
|
this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BasicLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
|
||||||
|
super(BasicLikelihoodTestProvider.class, String.format("ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
|
||||||
|
this.baseQual = baseQual;
|
||||||
|
this.delQual = delQual;
|
||||||
|
this.insQual = insQual;
|
||||||
|
this.gcp = gcp;
|
||||||
|
this.read = read;
|
||||||
|
this.ref = ref;
|
||||||
|
this.expectedQual = expectedQual;
|
||||||
|
|
||||||
|
refBasesWithContext = asBytes(ref, left, right);
|
||||||
|
readBasesWithContext = asBytes(read, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double expectedLogL() {
|
||||||
|
return expectedQual / -10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double tolerance() {
|
||||||
|
return 0.1; // TODO FIXME arbitrary
|
||||||
|
}
|
||||||
|
|
||||||
|
public double calcLogL() {
|
||||||
|
|
||||||
|
double logL = hmm.computeReadLikelihoodGivenHaplotype(
|
||||||
|
refBasesWithContext, readBasesWithContext,
|
||||||
|
qualAsBytes(baseQual, false), qualAsBytes(insQual, true), qualAsBytes(delQual, true),
|
||||||
|
qualAsBytes(gcp, false));
|
||||||
|
|
||||||
|
return logL;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
|
||||||
|
return ( (left ? LEFT_FLANK : "") + CONTEXT + bases + CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] qualAsBytes(final int phredQual, final boolean doGOP) {
|
||||||
|
final byte phredQuals[] = new byte[readBasesWithContext.length];
|
||||||
|
// initialize everything to MASSIVE_QUAL so it cannot be moved by HMM
|
||||||
|
Arrays.fill(phredQuals, (byte)100);
|
||||||
|
|
||||||
|
// update just the bases corresponding to the provided micro read with the quality scores
|
||||||
|
if( doGOP ) {
|
||||||
|
phredQuals[0 + CONTEXT.length()] = (byte)phredQual;
|
||||||
|
} else {
|
||||||
|
for ( int i = 0; i < read.length(); i++)
|
||||||
|
phredQuals[i + CONTEXT.length()] = (byte)phredQual;
|
||||||
|
}
|
||||||
|
|
||||||
|
return phredQuals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final Random random = new Random(87865573);
|
||||||
|
private class BandedLikelihoodTestProvider extends TestDataProvider {
|
||||||
|
final String ref, read;
|
||||||
|
final byte[] refBasesWithContext, readBasesWithContext;
|
||||||
|
final int baseQual, insQual, delQual, gcp;
|
||||||
|
final int expectedQual;
|
||||||
|
final static String LEFT_CONTEXT = "ACGTAATGACGCTACATGTCGCCAACCGTC";
|
||||||
|
final static String RIGHT_CONTEXT = "TACGGCTTCATATAGGGCAATGTGTGTGGCAAAA";
|
||||||
|
final static String LEFT_FLANK = "GATTTATCATCGAGTCTGTT";
|
||||||
|
final static String RIGHT_FLANK = "CATGGATCGTTATCAGCTATCTCGAGGGATTCACTTAACAGTTTCCGTA";
|
||||||
|
final byte[] baseQuals, insQuals, delQuals, gcps;
|
||||||
|
|
||||||
|
public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp) {
|
||||||
|
this(ref, read, baseQual, insQual, delQual, expectedQual, gcp, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BandedLikelihoodTestProvider(final String ref, final String read, final int baseQual, final int insQual, final int delQual, final int expectedQual, final int gcp, final boolean left, final boolean right) {
|
||||||
|
super(BandedLikelihoodTestProvider.class, String.format("BANDED: ref=%s read=%s b/i/d/c quals = %d/%d/%d/%d l/r flank = %b/%b e[qual]=%d", ref, read, baseQual, insQual, delQual, gcp, left, right, expectedQual));
|
||||||
|
this.baseQual = baseQual;
|
||||||
|
this.delQual = delQual;
|
||||||
|
this.insQual = insQual;
|
||||||
|
this.gcp = gcp;
|
||||||
|
this.read = read;
|
||||||
|
this.ref = ref;
|
||||||
|
this.expectedQual = expectedQual;
|
||||||
|
|
||||||
|
refBasesWithContext = asBytes(ref, left, right);
|
||||||
|
readBasesWithContext = asBytes(read, false, false);
|
||||||
|
baseQuals = qualAsBytes(baseQual);
|
||||||
|
insQuals = qualAsBytes(insQual);
|
||||||
|
delQuals = qualAsBytes(delQual);
|
||||||
|
gcps = qualAsBytes(gcp, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double expectedLogL() {
|
||||||
|
double logL = hmm.computeReadLikelihoodGivenHaplotype(
|
||||||
|
refBasesWithContext, readBasesWithContext,
|
||||||
|
baseQuals, insQuals, delQuals, gcps);
|
||||||
|
|
||||||
|
return logL;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double tolerance() {
|
||||||
|
return 0.2; // TODO FIXME arbitrary
|
||||||
|
}
|
||||||
|
|
||||||
|
public double calcLogL() {
|
||||||
|
|
||||||
|
double logL = bandedHMM.computeReadLikelihoodGivenHaplotype(
|
||||||
|
refBasesWithContext, readBasesWithContext,
|
||||||
|
baseQuals, insQuals, delQuals, gcps);
|
||||||
|
|
||||||
|
return logL;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final byte[] asBytes(final String bases, final boolean left, final boolean right) {
|
||||||
|
return ( (left ? LEFT_FLANK : "") + LEFT_CONTEXT + bases + RIGHT_CONTEXT + (right ? RIGHT_FLANK : "")).getBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] qualAsBytes(final int phredQual) {
|
||||||
|
return qualAsBytes(phredQual, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] qualAsBytes(final int phredQual, final boolean addRandom) {
|
||||||
|
final byte phredQuals[] = new byte[readBasesWithContext.length];
|
||||||
|
Arrays.fill(phredQuals, (byte)phredQual);
|
||||||
|
if(addRandom) {
|
||||||
|
for( int iii = 0; iii < phredQuals.length; iii++) {
|
||||||
|
phredQuals[iii] = (byte) ((int) phredQuals[iii] + (random.nextInt(7) - 3));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return phredQuals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "BasicLikelihoodTestProvider")
|
||||||
|
public Object[][] makeBasicLikelihoodTests() {
|
||||||
|
// context on either side is ACGTTGCA REF ACGTTGCA
|
||||||
|
// test all combinations
|
||||||
|
final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(30);
|
||||||
|
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 30, 40, 50) : Arrays.asList(40);
|
||||||
|
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
|
||||||
|
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
|
||||||
|
|
||||||
|
for ( final int baseQual : baseQuals ) {
|
||||||
|
for ( final int indelQual : indelQuals ) {
|
||||||
|
for ( final int gcp : gcps ) {
|
||||||
|
|
||||||
|
// test substitutions
|
||||||
|
for ( final byte refBase : BaseUtils.BASES ) {
|
||||||
|
for ( final byte readBase : BaseUtils.BASES ) {
|
||||||
|
final String ref = new String(new byte[]{refBase});
|
||||||
|
final String read = new String(new byte[]{readBase});
|
||||||
|
final int expected = refBase == readBase ? 0 : baseQual;
|
||||||
|
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test insertions and deletions
|
||||||
|
for ( final int size : sizes ) {
|
||||||
|
for ( final byte base : BaseUtils.BASES ) {
|
||||||
|
final int expected = indelQual + (size - 2) * gcp;
|
||||||
|
|
||||||
|
for ( boolean insertionP : Arrays.asList(true, false)) {
|
||||||
|
final String small = Utils.dupString((char)base, 1);
|
||||||
|
final String big = Utils.dupString((char)base, size);
|
||||||
|
|
||||||
|
final String ref = insertionP ? small : big;
|
||||||
|
final String read = insertionP ? big : small;
|
||||||
|
|
||||||
|
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||||
|
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
|
||||||
|
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
|
||||||
|
new BasicLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
|
||||||
|
public void testBasicLikelihoods(BasicLikelihoodTestProvider cfg) {
|
||||||
|
double calculatedLogL = cfg.calcLogL();
|
||||||
|
double expectedLogL = cfg.expectedLogL();
|
||||||
|
logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
|
||||||
|
Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "BandedLikelihoodTestProvider")
|
||||||
|
public Object[][] makeBandedLikelihoodTests() {
|
||||||
|
// context on either side is ACGTTGCA REF ACGTTGCA
|
||||||
|
// test all combinations
|
||||||
|
final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(25, 30, 40, 50) : Arrays.asList(30);
|
||||||
|
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(30, 40, 50) : Arrays.asList(40);
|
||||||
|
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 12) : Arrays.asList(10);
|
||||||
|
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
|
||||||
|
|
||||||
|
for ( final int baseQual : baseQuals ) {
|
||||||
|
for ( final int indelQual : indelQuals ) {
|
||||||
|
for ( final int gcp : gcps ) {
|
||||||
|
|
||||||
|
// test substitutions
|
||||||
|
for ( final byte refBase : BaseUtils.BASES ) {
|
||||||
|
for ( final byte readBase : BaseUtils.BASES ) {
|
||||||
|
final String ref = new String(new byte[]{refBase});
|
||||||
|
final String read = new String(new byte[]{readBase});
|
||||||
|
final int expected = refBase == readBase ? 0 : baseQual;
|
||||||
|
new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test insertions and deletions
|
||||||
|
for ( final int size : sizes ) {
|
||||||
|
for ( final byte base : BaseUtils.BASES ) {
|
||||||
|
final int expected = indelQual + (size - 2) * gcp;
|
||||||
|
|
||||||
|
for ( boolean insertionP : Arrays.asList(true, false)) {
|
||||||
|
final String small = Utils.dupString((char)base, 1);
|
||||||
|
final String big = Utils.dupString((char)base, size);
|
||||||
|
|
||||||
|
final String ref = insertionP ? small : big;
|
||||||
|
final String read = insertionP ? big : small;
|
||||||
|
|
||||||
|
new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp);
|
||||||
|
new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, false);
|
||||||
|
new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, false, true);
|
||||||
|
new BandedLikelihoodTestProvider(ref, read, baseQual, indelQual, indelQual, expected, gcp, true, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return BandedLikelihoodTestProvider.getTests(BandedLikelihoodTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "BandedLikelihoodTestProvider", enabled = true)
|
||||||
|
public void testBandedLikelihoods(BandedLikelihoodTestProvider cfg) {
|
||||||
|
double calculatedLogL = cfg.calcLogL();
|
||||||
|
double expectedLogL = cfg.expectedLogL();
|
||||||
|
logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
|
||||||
|
Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMismatchInEveryPositionInTheReadWithCenteredHaplotype() {
|
||||||
|
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||||
|
|
||||||
|
final int offset = 2;
|
||||||
|
byte[] gop = new byte[haplotype1.length - 2 * offset];
|
||||||
|
Arrays.fill(gop, (byte) 80);
|
||||||
|
byte[] gcp = new byte[haplotype1.length - 2 * offset];
|
||||||
|
Arrays.fill(gcp, (byte) 80);
|
||||||
|
|
||||||
|
for( int k = 0; k < haplotype1.length - 2 * offset; k++ ) {
|
||||||
|
byte[] quals = new byte[haplotype1.length - 2 * offset];
|
||||||
|
Arrays.fill(quals, (byte) 90);
|
||||||
|
// one read mismatches the haplotype
|
||||||
|
quals[k] = 20;
|
||||||
|
|
||||||
|
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset);
|
||||||
|
// change single base at position k to C. If it's a C, change to T
|
||||||
|
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||||
|
double res1 = hmm.computeReadLikelihoodGivenHaplotype(
|
||||||
|
haplotype1, mread,
|
||||||
|
quals, gop, gop,
|
||||||
|
gcp);
|
||||||
|
|
||||||
|
|
||||||
|
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||||
|
|
||||||
|
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMismatchInEveryPositionInTheRead() {
|
||||||
|
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||||
|
|
||||||
|
final int offset = 2;
|
||||||
|
byte[] gop = new byte[haplotype1.length - offset];
|
||||||
|
Arrays.fill(gop, (byte) 80);
|
||||||
|
byte[] gcp = new byte[haplotype1.length - offset];
|
||||||
|
Arrays.fill(gcp, (byte) 80);
|
||||||
|
|
||||||
|
for( int k = 0; k < haplotype1.length - offset; k++ ) {
|
||||||
|
byte[] quals = new byte[haplotype1.length - offset];
|
||||||
|
Arrays.fill(quals, (byte) 90);
|
||||||
|
// one read mismatches the haplotype
|
||||||
|
quals[k] = 20;
|
||||||
|
|
||||||
|
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length);
|
||||||
|
// change single base at position k to C. If it's a C, change to T
|
||||||
|
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||||
|
double res1 = hmm.computeReadLikelihoodGivenHaplotype(
|
||||||
|
haplotype1, mread,
|
||||||
|
quals, gop, gop,
|
||||||
|
gcp);
|
||||||
|
|
||||||
|
|
||||||
|
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||||
|
|
||||||
|
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -24,10 +24,6 @@ public class BaseRecalibrationUnitTest {
|
||||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||||
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap;
|
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap;
|
||||||
|
|
||||||
private BQSRKeyManager rgKeyManager;
|
|
||||||
private BQSRKeyManager qsKeyManager;
|
|
||||||
private BQSRKeyManager cvKeyManager;
|
|
||||||
|
|
||||||
private ReadGroupCovariate rgCovariate;
|
private ReadGroupCovariate rgCovariate;
|
||||||
private QualityScoreCovariate qsCovariate;
|
private QualityScoreCovariate qsCovariate;
|
||||||
private ContextCovariate cxCovariate;
|
private ContextCovariate cxCovariate;
|
||||||
|
|
@ -60,13 +56,13 @@ public class BaseRecalibrationUnitTest {
|
||||||
rgCovariate = new ReadGroupCovariate();
|
rgCovariate = new ReadGroupCovariate();
|
||||||
rgCovariate.initialize(RAC);
|
rgCovariate.initialize(RAC);
|
||||||
requiredCovariates.add(rgCovariate);
|
requiredCovariates.add(rgCovariate);
|
||||||
rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
keysAndTablesMap.put(rgKeyManager, new HashMap<BitSet, RecalDatum>());
|
keysAndTablesMap.put(rgKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
qsCovariate = new QualityScoreCovariate();
|
qsCovariate = new QualityScoreCovariate();
|
||||||
qsCovariate.initialize(RAC);
|
qsCovariate.initialize(RAC);
|
||||||
requiredCovariates.add(qsCovariate);
|
requiredCovariates.add(qsCovariate);
|
||||||
qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
keysAndTablesMap.put(qsKeyManager, new HashMap<BitSet, RecalDatum>());
|
keysAndTablesMap.put(qsKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
cxCovariate = new ContextCovariate();
|
cxCovariate = new ContextCovariate();
|
||||||
|
|
@ -75,7 +71,7 @@ public class BaseRecalibrationUnitTest {
|
||||||
cyCovariate = new CycleCovariate();
|
cyCovariate = new CycleCovariate();
|
||||||
cyCovariate.initialize(RAC);
|
cyCovariate.initialize(RAC);
|
||||||
optionalCovariates.add(cyCovariate);
|
optionalCovariates.add(cyCovariate);
|
||||||
cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
keysAndTablesMap.put(cvKeyManager, new HashMap<BitSet, RecalDatum>());
|
keysAndTablesMap.put(cvKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -108,7 +104,7 @@ public class BaseRecalibrationUnitTest {
|
||||||
updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum);
|
updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_QUAL_SCORE);
|
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
|
|
||||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||||
List<Long> qualCounts = new ArrayList<Long>();
|
List<Long> qualCounts = new ArrayList<Long>();
|
||||||
|
|
@ -179,7 +175,7 @@ public class BaseRecalibrationUnitTest {
|
||||||
BitSet key = entry.getKey();
|
BitSet key = entry.getKey();
|
||||||
RecalDatum datum = entry.getValue();
|
RecalDatum datum = entry.getValue();
|
||||||
List<Object> keySet = keyManager.keySetFrom(key);
|
List<Object> keySet = keyManager.keySetFrom(key);
|
||||||
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum));
|
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported());
|
||||||
}
|
}
|
||||||
System.out.println();
|
System.out.println();
|
||||||
}
|
}
|
||||||
|
|
@ -187,9 +183,9 @@ public class BaseRecalibrationUnitTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printNestedHashMap(Map<Object,Object> table, String output) {
|
private static void printNestedHashMap(Map table, String output) {
|
||||||
for (Object key : table.keySet()) {
|
for (Object key : table.keySet()) {
|
||||||
String ret = "";
|
String ret;
|
||||||
if (output.isEmpty())
|
if (output.isEmpty())
|
||||||
ret = "" + key;
|
ret = "" + key;
|
||||||
else
|
else
|
||||||
|
|
@ -199,7 +195,7 @@ public class BaseRecalibrationUnitTest {
|
||||||
if (next instanceof org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum)
|
if (next instanceof org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum)
|
||||||
System.out.println(ret + " => " + next);
|
System.out.println(ret + " => " + next);
|
||||||
else
|
else
|
||||||
printNestedHashMap((Map<Object, Object>) next, "" + ret);
|
printNestedHashMap((Map) next, "" + ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -269,7 +265,7 @@ public class BaseRecalibrationUnitTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE);
|
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
|
|
||||||
// Verbose printouts used to validate with old recalibrator
|
// Verbose printouts used to validate with old recalibrator
|
||||||
//if(key.contains(null)) {
|
//if(key.contains(null)) {
|
||||||
|
|
@ -289,6 +285,6 @@ public class BaseRecalibrationUnitTest {
|
||||||
final double doubleMismatches = (double) (errors + smoothing);
|
final double doubleMismatches = (double) (errors + smoothing);
|
||||||
final double doubleObservations = (double) ( observations + smoothing );
|
final double doubleObservations = (double) ( observations + smoothing );
|
||||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||||
return Math.min(QualityUtils.MAX_QUAL_SCORE, empiricalQual);
|
return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue