Final version of BAQ calculation. default gap open is 1e-4, a good sensitive value. Useful timer class SimpleTimer added. BAQ is now live.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4818 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-12-10 19:35:12 +00:00
parent 491a599b59
commit 5b46a900b3
12 changed files with 284 additions and 128 deletions

View File

@ -548,7 +548,7 @@ public abstract class AbstractGenomeAnalysisEngine {
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
DownsamplingMethod method = getDownsamplingMethod();
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE )
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
return new SAMDataSource(
@ -562,7 +562,7 @@ public abstract class AbstractGenomeAnalysisEngine {
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents(),
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE,
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
getWalkerBAQQualityMode(),
refReader);
}

View File

@ -38,7 +38,7 @@ public class ReadProperties {
private boolean includeReadsWithDeletionAtLoci = false;
private boolean useOriginalBaseQualities = false;
private boolean generateExtendedEvents = false;
private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE;
private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF;
private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY;
IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
@ -153,12 +152,11 @@ public class GATKArgumentCollection {
@Element(required = false)
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE;
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
@Element(required = false)
@Hidden
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="Gap open penalty. For testing purposes only", required = false)
public double BAQGOP = 1e-3; // todo remove me
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty. Default value is 1e-4. 1e-3 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
/**
* Gets the default downsampling method, returned if the user didn't specify any downsampling
@ -348,8 +346,9 @@ public class GATKArgumentCollection {
}
if (BTIMergeRule != other.BTIMergeRule)
return false;
if ( BAQMode != other.BAQMode)
return false;
if ( BAQMode != other.BAQMode) return false;
if ( BAQGOP != other.BAQGOP ) return false;
return true;
}

View File

@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource {
supplementalFilters,
includeReadsWithDeletionAtLoci,
generateExtendedEvents,
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
);
}
@ -592,7 +592,7 @@ public class SAMDataSource implements SimpleDataSource {
if (!noValidationOfReadOrder && enableVerification)
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
if (cmode != BAQ.CalculationMode.NONE)
if (cmode != BAQ.CalculationMode.OFF)
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));

View File

@ -242,7 +242,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
* @{inheritDoc}
*/
public void addAlignment( SAMRecord alignment ) {
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
//System.out.printf("Writing BAQ at OUTPUT TIME%n");
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
}

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -53,6 +54,7 @@ import java.io.PrintStream;
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
@Output
protected PrintStream out;

View File

@ -28,18 +28,16 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.util.ParsingUtils;
import org.broad.tribble.vcf.VCFConstants;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.io.PrintStream;
import java.io.File;
@ -60,6 +58,8 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
int MAX_RECORDS = -1;
SimpleTimer timer = new SimpleTimer("myTimer");
public void initialize() {
File rodFile = getRodFile();
@ -75,13 +75,13 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE));
}
startTimer(); // start up timer for map itself
timer.start(); // start up timer for map itself
}
private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE };
private final double readFile(File f, ReadMode mode) {
startTimer();
timer.start();
try {
byte[] data = new byte[100000];
@ -120,17 +120,7 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
throw new RuntimeException(e);
}
return elapsedTime();
}
private long startTime = -1l;
private void startTimer() {
startTime = System.currentTimeMillis();
}
private double elapsedTime() {
final long curTime = System.currentTimeMillis();
return (curTime - startTime) / 1000.0;
return timer.getElapsedTime();
}
private File getRodFile() {
@ -162,6 +152,6 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
}
public void onTraversalDone(Integer sum) {
out.printf("gatk.traversal\t%d\t%.2f%n", 0, elapsedTime());
out.printf("gatk.traversal\t%d\t%.2f%n", 0, timer.getElapsedTime());
}
}

View File

@ -8,9 +8,11 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.Argument;
@ -32,10 +34,10 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
protected int maxReadLen = 1000;
@Argument(doc="",required=false)
protected double bw = 1e-3;
protected int bw = 7;
@Argument(doc="",required=false)
protected int mbq = 4;
protected boolean samtoolsMode = false;
@Argument(doc="only operates on reads with this name",required=false)
protected String readName = null;
@ -55,91 +57,142 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
@Argument(doc="x each read is processed", required=false)
protected int magnification = 1;
@Argument(doc="Profile performance", required=false)
protected boolean profile = false;
int counter = 0;
BAQ baqHMM = null; // matches current samtools parameters
public void initialize() {
baqHMM = new BAQ(bw, 0.1, 7, (byte)mbq);
if ( samtoolsMode )
baqHMM = new BAQ(1e-3, 0.1, bw, (byte)0, true);
else
baqHMM = new BAQ();
}
long goodReads = 0, badReads = 0;
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
if ( (readName == null || readName.equals(read.getReadName())) && read.getReadLength() <= maxReadLen && (includeReadsWithoutBAQTag || BAQ.hasBAQTag(read) ) ) {
if ( baqHMM.excludeReadFromBAQ(read) )
return 1;
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
BAQ.BAQCalculationResult baq = null;
for ( int i = 0; i < magnification; i++ )
baq = baqHMM.calcBAQFromHMM(read, refReader);
return 0;
boolean fail = false;
boolean print = false;
int badi = 0;
if ( profile ) {
profileBAQ(ref, read);
} else {
validateBAQ(ref, read);
}
if ( BAQ.hasBAQTag(read) ) {
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
if ( baqFromTag[badi] != baq.bq[badi] ) {
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
print = true;
fail = strict;
out.printf(" different, but Q0 base detected%n");
break;
}
else if (readHasSoftClip(read)) {
print = true;
fail = strict;
out.printf(" different, but soft clip detected%n");
break;
} else if (readHasDeletion(read)) {
print = true;
fail = strict;
out.printf(" different, but deletion detected%n");
break;
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
print = fail = true;
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
break;
} else {
print = fail = true;
break;
}
return 1;
}
return 0;
}
SimpleTimer tagTimer = new SimpleTimer("from.tag");
SimpleTimer baqReadTimer = new SimpleTimer("baq.read");
SimpleTimer glocalTimer = new SimpleTimer("hmm.glocal");
private void profileBAQ(ReferenceContext ref, SAMRecord read) {
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
BAQ.BAQCalculationResult baq = null;
tagTimer.restart();
for ( int i = 0; i < magnification; i++ ) { BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); }
tagTimer.stop();
baqReadTimer.restart();
for ( int i = 0; i < magnification; i++ ) { baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY ); }
baqReadTimer.stop();
glocalTimer.restart();
BAQ.MAG = magnification;
baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY);
BAQ.MAG = 1;
glocalTimer.stop();
}
private void validateBAQ(ReferenceContext ref, SAMRecord read) {
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
BAQ.BAQCalculationResult baq = baqHMM.calcBAQFromHMM(read, refReader);
boolean fail = false;
boolean print = false;
int badi = 0;
if ( BAQ.hasBAQTag(read) ) {
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
if ( baqFromTag[badi] != baq.bq[badi] ) {
if ( cigarLength(read) != read.getReadLength() ) {
print = true;
fail = false;
out.printf(" different, but cigar length != read length%n");
break;
}
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
print = true;
fail = strict;
out.printf(" different, but Q0 base detected%n");
break;
}
else if (readHasSoftClip(read) && ! samtoolsMode) {
print = true;
fail = strict;
out.printf(" different, but soft clip detected%n");
break;
} else if (readHasDeletion(read) ) { // && ! samtoolsMode) {
print = true;
fail = strict;
out.printf(" different, but deletion detected%n");
break;
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
print = fail = true;
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
break;
} else {
print = fail = true;
break;
}
}
}
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
byte[] pos = new byte[baq.bq.length];
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
out.printf(" read length : %d%n", read.getReadLength());
out.printf(" read start : %d%n", read.getAlignmentStart());
out.printf(" cigar : %s%n", read.getCigarString());
out.printf(" ref bases : %s%n", new String(baq.refBases));
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.printf(" ref length : %d%n", baq.refBases.length);
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
printQuals(" original quals: ", read.getBaseQualities(), true);
printQuals(" baq quals: ", baq.bq, true);
printQuals(" positions : ", pos, true);
printQuals(" original quals: ", read.getBaseQualities());
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
printQuals(" hmm quals: ", baq.bq);
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.println(Utils.dupString('-', 80));
}
if ( fail )
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
if ( fail || print )
badReads++;
else
goodReads++;
}
return 1;
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
byte[] pos = new byte[baq.bq.length];
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
out.printf(" read length : %d%n", read.getReadLength());
out.printf(" read start : %d (%d unclipped)%n", read.getAlignmentStart(), read.getUnclippedStart());
out.printf(" cigar : %s%n", read.getCigarString());
out.printf(" ref bases : %s%n", new String(baq.refBases));
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.printf(" ref length : %d%n", baq.refBases.length);
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
printQuals(" original quals: ", read.getBaseQualities(), true);
printQuals(" baq quals: ", baq.bq, true);
printQuals(" positions : ", pos, true);
printQuals(" original quals: ", read.getBaseQualities());
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
printQuals(" hmm quals: ", baq.bq);
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.println(Utils.dupString('-', 80));
}
if ( fail )
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
}
private final static boolean readHasSoftClip(SAMRecord read) {
@ -196,10 +249,51 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
return null;
}
private int cigarLength(SAMRecord read) {
int readI = 0;
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
int l = elt.getLength();
switch (elt.getOperator()) {
case N: // cannot handle these
return 0;
case H : case P : // ignore pads and hard clips
break;
case S :
case I :
readI += l;
break;
case D : break;
case M :
readI += l;
break;
default:
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
}
}
return readI;
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
public void onTraversalDone(Integer nreads) {
if ( profile ) {
out.printf("n.reads baq.per.read calculation time.in.secs%n");
printTimer(nreads, tagTimer);
printTimer(nreads, glocalTimer);
printTimer(nreads, baqReadTimer);
} else {
out.printf("total reads BAQ'd %d; concordant BAQ reads %d %.4f; discordant BAQ reads %d %.4f%n", nreads,
goodReads, (100.0 * goodReads) / nreads,
badReads, (100.0 * badReads) / nreads);
}
}
private final void printTimer(int nreads, SimpleTimer timer) {
out.printf("%d %d %s %.2f%n", nreads, magnification, timer.getName(), timer.getElapsedTime());
}
}

View File

@ -0,0 +1,59 @@
package org.broadinstitute.sting.utils;
import java.io.PrintStream;
/**
* A useful simple system for timing code.
*
* User: depristo
* Date: Dec 10, 2010
* Time: 9:07:44 AM
*/
public class SimpleTimer {
private String name = "";
private long elapsed = 0l;
private long startTime = -1l;
boolean running = false;
public SimpleTimer(String name) {
this.name = name;
}
public String getName() {
return name;
}
public SimpleTimer start() {
elapsed = 0l;
restart();
return this;
}
public SimpleTimer restart() {
running = true;
startTime = currentTime();
return this;
}
public long currentTime() {
return System.currentTimeMillis();
}
public SimpleTimer stop() {
running = false;
elapsed += currentTime() - startTime;
return this;
}
public double getElapsedTime() {
if ( running )
return (currentTime() - startTime) / 1000.0;
else
return elapsed;
}
public void printElapsedTime(PrintStream out) {
out.printf("SimpleTimer %s: %.2f%n", name, getElapsedTime());
}
}

View File

@ -6,9 +6,6 @@ import net.sf.samtools.CigarOperator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.Arrays;
/*
The topology of the profile HMM:
@ -39,7 +36,7 @@ public class BAQ {
private final static boolean DEBUG = false;
public enum CalculationMode {
NONE, // don't apply a BAQ at all, the default
OFF, // don't apply a BAQ at all, the default
CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag
RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
}
@ -66,11 +63,12 @@ public class BAQ {
qual2prob[i] = Math.pow(10, -i/10.);
}
public static double DEFAULT_GOP = 1e-3; // todo -- make me final private
public static double DEFAULT_GOP = 1e-4;
public double cd = -1; // gap open probility [1e-3]
private double ce = 0.1; // gap extension probability [0.1]
private int cb = 7; // band width [7]
private boolean includeClippedBases = false;
public byte getMinBaseQual() {
return minBaseQual;
@ -109,8 +107,10 @@ public class BAQ {
* @param b band width
* @param minBaseQual All bases with Q < minBaseQual are up'd to this value
*/
public BAQ(final double d, final double e, final int b, final byte minBaseQual) {
cd = d; ce = e; cb = b; this.minBaseQual = minBaseQual;
public BAQ(final double d, final double e, final int b, final byte minBaseQual, boolean includeClippedBases) {
cd = d; ce = e; cb = b;
this.minBaseQual = minBaseQual;
this.includeClippedBases = includeClippedBases;
initializeCachedData();
}
@ -142,12 +142,6 @@ public class BAQ {
return EPSILONS[ref][read][qualB];
}
// private double calcEpsilon( byte ref, byte read, byte qualB ) {
// double qual = qual2prob[qualB < minBaseQual ? minBaseQual : qualB];
// return (ref > 3 || read > 3)? 1. : ref == read ? 1. - qual : qual * EM;
// }
// ####################################################################################################
//
// NOTE -- THIS CODE IS SYNCHRONIZED WITH CODE IN THE SAMTOOLS REPOSITORY. CHANGES TO THIS CODE SHOULD BE
@ -169,12 +163,21 @@ public class BAQ {
/*** initialization ***/
// change coordinates
int l_ref = ref.length;
//int l_query = query.length;
// set band width
int bw2, bw = l_ref > l_query? l_ref : l_query;
if (bw > cb) bw = cb;
if (bw < Math.abs(l_ref - l_query)) bw = Math.abs(l_ref - l_query);
if (cb < Math.abs(l_ref - l_query)) {
bw = Math.abs(l_ref - l_query) + 3;
//System.out.printf("SC cb=%d, bw=%d%n", cb, bw);
}
if (bw > cb) bw = cb;
if (bw < Math.abs(l_ref - l_query)) {
//int bwOld = bw;
bw = Math.abs(l_ref - l_query);
//System.out.printf("old bw is %d, new is %d%n", bwOld, bw);
}
//System.out.printf("c->bw = %d, bw = %d, l_ref = %d, l_query = %d\n", cb, bw, l_ref, l_query);
bw2 = bw * 2 + 1;
// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
@ -185,12 +188,14 @@ public class BAQ {
// initialize transition probabilities
double sM, sI, bM, bI;
sM = sI = 1. / (2 * l_query + 2);
bM = (1 - cd) / l_query; bI = cd / l_query; // (bM+bI)*l_query==1
bM = (1 - cd) / l_ref; bI = cd / l_ref; // (bM+bI)*l_ref==1
double[] m = new double[9];
m[0*3+0] = (1 - cd - cd) * (1 - sM); m[0*3+1] = m[0*3+2] = cd * (1 - sM);
m[1*3+0] = (1 - ce) * (1 - sI); m[1*3+1] = ce * (1 - sI); m[1*3+2] = 0.;
m[2*3+0] = 1 - ce; m[2*3+1] = 0.; m[2*3+2] = ce;
/*** forward ***/
// f[0]
f[0][set_u(bw, 0, 0)] = s[0] = 1.;
@ -274,8 +279,7 @@ public class BAQ {
for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
}
// TODO -- this appears to be a null operation overall. For debugging only?
double pb;
double pb;
{ // b[0]
int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
double sum = 0.;
@ -288,6 +292,7 @@ public class BAQ {
pb = b[0][set_u(bw, 0, 0)] = sum / s[0]; // if everything works as is expected, pb == 1.0
}
/*** MAP ***/
for (i = 1; i <= l_query; ++i) {
double sum = 0., max = 0.;
@ -450,30 +455,34 @@ public class BAQ {
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, IndexedFastaSequenceFile refReader) {
// start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar
int offset = getBandWidth() / 2;
long start = Math.max(read.getAlignmentStart() - offset - getFirstInsertionOffset(read), 0);
long stop = read.getAlignmentEnd() + offset + getLastInsertionOffset(read);
long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart();
long start = Math.max(readStart - offset - getFirstInsertionOffset(read), 0);
long stop = (includeClippedBases ? read.getUnclippedEnd() : read.getAlignmentEnd()) + offset + getLastInsertionOffset(read);
if ( stop > refReader.getSequenceDictionary().getSequence(read.getReferenceName()).getSequenceLength() ) {
return null;
} else {
// now that we have the start and stop, get the reference sequence covering it
ReferenceSequence refSeq = refReader.getSubsequenceAt(read.getReferenceName(), start, stop);
return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - read.getAlignmentStart()));
return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - readStart));
}
}
public static int MAG = 1; // todo -- remove me for performance testing only
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
// note -- assumes ref is offset from the *CLIPPED* start
BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
int queryLen = queryEnd - queryStart;
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
for ( int i = 0; i < MAG; i++ )
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
return baqResult;
}
// we need to bad ref by at least the bandwidth / 2 on either side
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) {
int queryStart = (int)(read.getAlignmentStart() - read.getUnclippedStart());
int queryEnd = (int)(read.getReadLength() - (read.getUnclippedEnd() - read.getAlignmentEnd()));
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
int queryStart = includeClippedBases ? 0 : read.getAlignmentStart() - read.getUnclippedStart();
int queryEnd = read.getReadLength() - (includeClippedBases ? 0 : read.getUnclippedEnd() - read.getAlignmentEnd());
BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getReadBases(), read.getBaseQualities(), queryStart, queryEnd);
// cap quals
@ -485,7 +494,8 @@ public class BAQ {
return null;
case H : case P : // ignore pads and hard clips
break;
case I : case S :
case S : refI += l; // move the reference too, in addition to I
case I :
// todo -- is it really the case that we want to treat I and S the same?
for ( int i = readI; i < readI + l; i++ ) baqResult.bq[i] = baqResult.rawQuals[i];
readI += l;
@ -502,6 +512,8 @@ public class BAQ {
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
}
}
if ( readI != read.getReadLength() ) // odd cigar string
System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length);
return baqResult;
}
@ -532,7 +544,7 @@ public class BAQ {
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything
if ( calculationType == CalculationMode.OFF) { // we don't want to do anything
; // just fall though
} else if ( excludeReadFromBAQ(read) ) {
; // just fall through

View File

@ -28,7 +28,7 @@ public class BAQSamIterator implements StingSAMIterator {
* @param qmode
*/
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE");
if ( cmode == BAQ.CalculationMode.OFF) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
this.refReader = refReader;

View File

@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
new ArrayList<SamRecordFilter>(),
false,
false,
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
);
}
}