Final version of BAQ calculation. default gap open is 1e-4, a good sensitive value. Useful timer class SimpleTimer added. BAQ is now live.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4818 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
491a599b59
commit
5b46a900b3
|
|
@ -548,7 +548,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
|
||||
DownsamplingMethod method = getDownsamplingMethod();
|
||||
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE )
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
return new SAMDataSource(
|
||||
|
|
@ -562,7 +562,7 @@ public abstract class AbstractGenomeAnalysisEngine {
|
|||
filters,
|
||||
includeReadsWithDeletionAtLoci(),
|
||||
generateExtendedEvents(),
|
||||
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE,
|
||||
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
|
||||
getWalkerBAQQualityMode(),
|
||||
refReader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class ReadProperties {
|
|||
private boolean includeReadsWithDeletionAtLoci = false;
|
||||
private boolean useOriginalBaseQualities = false;
|
||||
private boolean generateExtendedEvents = false;
|
||||
private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE;
|
||||
private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF;
|
||||
private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY;
|
||||
IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -153,12 +152,11 @@ public class GATKArgumentCollection {
|
|||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
|
||||
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE;
|
||||
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
|
||||
|
||||
@Element(required = false)
|
||||
@Hidden
|
||||
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="Gap open penalty. For testing purposes only", required = false)
|
||||
public double BAQGOP = 1e-3; // todo remove me
|
||||
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty. Default value is 1e-4. 1e-3 is perhaps better for whole genome call sets", required = false)
|
||||
public double BAQGOP = BAQ.DEFAULT_GOP;
|
||||
|
||||
/**
|
||||
* Gets the default downsampling method, returned if the user didn't specify any downsampling
|
||||
|
|
@ -348,8 +346,9 @@ public class GATKArgumentCollection {
|
|||
}
|
||||
if (BTIMergeRule != other.BTIMergeRule)
|
||||
return false;
|
||||
if ( BAQMode != other.BAQMode)
|
||||
return false;
|
||||
|
||||
if ( BAQMode != other.BAQMode) return false;
|
||||
if ( BAQGOP != other.BAQGOP ) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
supplementalFilters,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
generateExtendedEvents,
|
||||
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -592,7 +592,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
if (!noValidationOfReadOrder && enableVerification)
|
||||
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
||||
|
||||
if (cmode != BAQ.CalculationMode.NONE)
|
||||
if (cmode != BAQ.CalculationMode.OFF)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
* @{inheritDoc}
|
||||
*/
|
||||
public void addAlignment( SAMRecord alignment ) {
|
||||
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
|
||||
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
|
||||
//System.out.printf("Writing BAQ at OUTPUT TIME%n");
|
||||
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -53,6 +54,7 @@ import java.io.PrintStream;
|
|||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE)
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
|
|||
|
|
@ -28,18 +28,16 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
|
|
@ -60,6 +58,8 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
|
||||
int MAX_RECORDS = -1;
|
||||
|
||||
SimpleTimer timer = new SimpleTimer("myTimer");
|
||||
|
||||
public void initialize() {
|
||||
File rodFile = getRodFile();
|
||||
|
||||
|
|
@ -75,13 +75,13 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
|||
out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE));
|
||||
}
|
||||
|
||||
startTimer(); // start up timer for map itself
|
||||
timer.start(); // start up timer for map itself
|
||||
}
|
||||
|
||||
private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE };
|
||||
|
||||
private final double readFile(File f, ReadMode mode) {
|
||||
startTimer();
|
||||
timer.start();
|
||||
|
||||
try {
|
||||
byte[] data = new byte[100000];
|
||||
|
|
@ -120,17 +120,7 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return elapsedTime();
|
||||
}
|
||||
|
||||
private long startTime = -1l;
|
||||
private void startTimer() {
|
||||
startTime = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
private double elapsedTime() {
|
||||
final long curTime = System.currentTimeMillis();
|
||||
return (curTime - startTime) / 1000.0;
|
||||
return timer.getElapsedTime();
|
||||
}
|
||||
|
||||
private File getRodFile() {
|
||||
|
|
@ -162,6 +152,6 @@ public class ProfileRodSystem extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
out.printf("gatk.traversal\t%d\t%.2f%n", 0, elapsedTime());
|
||||
out.printf("gatk.traversal\t%d\t%.2f%n", 0, timer.getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
|
@ -8,9 +8,11 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
|
|
@ -32,10 +34,10 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
|
|||
protected int maxReadLen = 1000;
|
||||
|
||||
@Argument(doc="",required=false)
|
||||
protected double bw = 1e-3;
|
||||
protected int bw = 7;
|
||||
|
||||
@Argument(doc="",required=false)
|
||||
protected int mbq = 4;
|
||||
protected boolean samtoolsMode = false;
|
||||
|
||||
@Argument(doc="only operates on reads with this name",required=false)
|
||||
protected String readName = null;
|
||||
|
|
@ -55,91 +57,142 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
|
|||
@Argument(doc="x each read is processed", required=false)
|
||||
protected int magnification = 1;
|
||||
|
||||
@Argument(doc="Profile performance", required=false)
|
||||
protected boolean profile = false;
|
||||
|
||||
int counter = 0;
|
||||
|
||||
BAQ baqHMM = null; // matches current samtools parameters
|
||||
|
||||
public void initialize() {
|
||||
baqHMM = new BAQ(bw, 0.1, 7, (byte)mbq);
|
||||
if ( samtoolsMode )
|
||||
baqHMM = new BAQ(1e-3, 0.1, bw, (byte)0, true);
|
||||
else
|
||||
baqHMM = new BAQ();
|
||||
}
|
||||
|
||||
long goodReads = 0, badReads = 0;
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
|
||||
|
||||
if ( (readName == null || readName.equals(read.getReadName())) && read.getReadLength() <= maxReadLen && (includeReadsWithoutBAQTag || BAQ.hasBAQTag(read) ) ) {
|
||||
if ( baqHMM.excludeReadFromBAQ(read) )
|
||||
return 1;
|
||||
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
|
||||
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
|
||||
BAQ.BAQCalculationResult baq = null;
|
||||
|
||||
for ( int i = 0; i < magnification; i++ )
|
||||
baq = baqHMM.calcBAQFromHMM(read, refReader);
|
||||
return 0;
|
||||
|
||||
boolean fail = false;
|
||||
boolean print = false;
|
||||
int badi = 0;
|
||||
if ( profile ) {
|
||||
profileBAQ(ref, read);
|
||||
} else {
|
||||
validateBAQ(ref, read);
|
||||
}
|
||||
|
||||
if ( BAQ.hasBAQTag(read) ) {
|
||||
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
|
||||
if ( baqFromTag[badi] != baq.bq[badi] ) {
|
||||
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but Q0 base detected%n");
|
||||
break;
|
||||
}
|
||||
else if (readHasSoftClip(read)) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but soft clip detected%n");
|
||||
break;
|
||||
} else if (readHasDeletion(read)) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but deletion detected%n");
|
||||
break;
|
||||
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
|
||||
print = fail = true;
|
||||
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
|
||||
break;
|
||||
} else {
|
||||
print = fail = true;
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SimpleTimer tagTimer = new SimpleTimer("from.tag");
|
||||
SimpleTimer baqReadTimer = new SimpleTimer("baq.read");
|
||||
SimpleTimer glocalTimer = new SimpleTimer("hmm.glocal");
|
||||
|
||||
private void profileBAQ(ReferenceContext ref, SAMRecord read) {
|
||||
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
|
||||
BAQ.BAQCalculationResult baq = null;
|
||||
|
||||
tagTimer.restart();
|
||||
for ( int i = 0; i < magnification; i++ ) { BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); }
|
||||
tagTimer.stop();
|
||||
|
||||
baqReadTimer.restart();
|
||||
for ( int i = 0; i < magnification; i++ ) { baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY ); }
|
||||
baqReadTimer.stop();
|
||||
|
||||
glocalTimer.restart();
|
||||
BAQ.MAG = magnification;
|
||||
baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY);
|
||||
BAQ.MAG = 1;
|
||||
glocalTimer.stop();
|
||||
}
|
||||
|
||||
|
||||
private void validateBAQ(ReferenceContext ref, SAMRecord read) {
|
||||
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
|
||||
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
|
||||
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
|
||||
|
||||
BAQ.BAQCalculationResult baq = baqHMM.calcBAQFromHMM(read, refReader);
|
||||
|
||||
boolean fail = false;
|
||||
boolean print = false;
|
||||
int badi = 0;
|
||||
|
||||
if ( BAQ.hasBAQTag(read) ) {
|
||||
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
|
||||
if ( baqFromTag[badi] != baq.bq[badi] ) {
|
||||
if ( cigarLength(read) != read.getReadLength() ) {
|
||||
print = true;
|
||||
fail = false;
|
||||
out.printf(" different, but cigar length != read length%n");
|
||||
break;
|
||||
}
|
||||
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but Q0 base detected%n");
|
||||
break;
|
||||
}
|
||||
else if (readHasSoftClip(read) && ! samtoolsMode) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but soft clip detected%n");
|
||||
break;
|
||||
} else if (readHasDeletion(read) ) { // && ! samtoolsMode) {
|
||||
print = true;
|
||||
fail = strict;
|
||||
out.printf(" different, but deletion detected%n");
|
||||
break;
|
||||
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
|
||||
print = fail = true;
|
||||
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
|
||||
break;
|
||||
} else {
|
||||
print = fail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
|
||||
byte[] pos = new byte[baq.bq.length];
|
||||
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
|
||||
|
||||
out.printf(" read length : %d%n", read.getReadLength());
|
||||
out.printf(" read start : %d%n", read.getAlignmentStart());
|
||||
out.printf(" cigar : %s%n", read.getCigarString());
|
||||
out.printf(" ref bases : %s%n", new String(baq.refBases));
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.printf(" ref length : %d%n", baq.refBases.length);
|
||||
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
|
||||
printQuals(" original quals: ", read.getBaseQualities(), true);
|
||||
printQuals(" baq quals: ", baq.bq, true);
|
||||
printQuals(" positions : ", pos, true);
|
||||
printQuals(" original quals: ", read.getBaseQualities());
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
|
||||
printQuals(" hmm quals: ", baq.bq);
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.println(Utils.dupString('-', 80));
|
||||
}
|
||||
|
||||
|
||||
if ( fail )
|
||||
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
|
||||
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
|
||||
if ( fail || print )
|
||||
badReads++;
|
||||
else
|
||||
goodReads++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
|
||||
byte[] pos = new byte[baq.bq.length];
|
||||
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
|
||||
|
||||
out.printf(" read length : %d%n", read.getReadLength());
|
||||
out.printf(" read start : %d (%d unclipped)%n", read.getAlignmentStart(), read.getUnclippedStart());
|
||||
out.printf(" cigar : %s%n", read.getCigarString());
|
||||
out.printf(" ref bases : %s%n", new String(baq.refBases));
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.printf(" ref length : %d%n", baq.refBases.length);
|
||||
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
|
||||
printQuals(" original quals: ", read.getBaseQualities(), true);
|
||||
printQuals(" baq quals: ", baq.bq, true);
|
||||
printQuals(" positions : ", pos, true);
|
||||
printQuals(" original quals: ", read.getBaseQualities());
|
||||
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
|
||||
printQuals(" hmm quals: ", baq.bq);
|
||||
out.printf(" read bases : %s%n", new String(read.getReadBases()));
|
||||
out.println(Utils.dupString('-', 80));
|
||||
}
|
||||
|
||||
|
||||
if ( fail )
|
||||
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
|
||||
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
|
||||
}
|
||||
|
||||
private final static boolean readHasSoftClip(SAMRecord read) {
|
||||
|
|
@ -196,10 +249,51 @@ public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
|
|||
return null;
|
||||
}
|
||||
|
||||
private int cigarLength(SAMRecord read) {
|
||||
int readI = 0;
|
||||
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
|
||||
int l = elt.getLength();
|
||||
switch (elt.getOperator()) {
|
||||
case N: // cannot handle these
|
||||
return 0;
|
||||
case H : case P : // ignore pads and hard clips
|
||||
break;
|
||||
case S :
|
||||
case I :
|
||||
readI += l;
|
||||
break;
|
||||
case D : break;
|
||||
case M :
|
||||
readI += l;
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
|
||||
}
|
||||
}
|
||||
return readI;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer nreads) {
|
||||
if ( profile ) {
|
||||
out.printf("n.reads baq.per.read calculation time.in.secs%n");
|
||||
printTimer(nreads, tagTimer);
|
||||
printTimer(nreads, glocalTimer);
|
||||
printTimer(nreads, baqReadTimer);
|
||||
} else {
|
||||
out.printf("total reads BAQ'd %d; concordant BAQ reads %d %.4f; discordant BAQ reads %d %.4f%n", nreads,
|
||||
goodReads, (100.0 * goodReads) / nreads,
|
||||
badReads, (100.0 * badReads) / nreads);
|
||||
}
|
||||
}
|
||||
|
||||
private final void printTimer(int nreads, SimpleTimer timer) {
|
||||
out.printf("%d %d %s %.2f%n", nreads, magnification, timer.getName(), timer.getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* A useful simple system for timing code.
|
||||
*
|
||||
* User: depristo
|
||||
* Date: Dec 10, 2010
|
||||
* Time: 9:07:44 AM
|
||||
*/
|
||||
public class SimpleTimer {
|
||||
private String name = "";
|
||||
private long elapsed = 0l;
|
||||
private long startTime = -1l;
|
||||
boolean running = false;
|
||||
|
||||
public SimpleTimer(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public SimpleTimer start() {
|
||||
elapsed = 0l;
|
||||
restart();
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleTimer restart() {
|
||||
running = true;
|
||||
startTime = currentTime();
|
||||
return this;
|
||||
}
|
||||
|
||||
public long currentTime() {
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public SimpleTimer stop() {
|
||||
running = false;
|
||||
elapsed += currentTime() - startTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public double getElapsedTime() {
|
||||
if ( running )
|
||||
return (currentTime() - startTime) / 1000.0;
|
||||
else
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
|
||||
public void printElapsedTime(PrintStream out) {
|
||||
out.printf("SimpleTimer %s: %.2f%n", name, getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,6 @@ import net.sf.samtools.CigarOperator;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
The topology of the profile HMM:
|
||||
|
|
@ -39,7 +36,7 @@ public class BAQ {
|
|||
private final static boolean DEBUG = false;
|
||||
|
||||
public enum CalculationMode {
|
||||
NONE, // don't apply a BAQ at all, the default
|
||||
OFF, // don't apply a BAQ at all, the default
|
||||
CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag
|
||||
RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
|
||||
}
|
||||
|
|
@ -66,11 +63,12 @@ public class BAQ {
|
|||
qual2prob[i] = Math.pow(10, -i/10.);
|
||||
}
|
||||
|
||||
public static double DEFAULT_GOP = 1e-3; // todo -- make me final private
|
||||
public static double DEFAULT_GOP = 1e-4;
|
||||
|
||||
public double cd = -1; // gap open probility [1e-3]
|
||||
private double ce = 0.1; // gap extension probability [0.1]
|
||||
private int cb = 7; // band width [7]
|
||||
private boolean includeClippedBases = false;
|
||||
|
||||
public byte getMinBaseQual() {
|
||||
return minBaseQual;
|
||||
|
|
@ -109,8 +107,10 @@ public class BAQ {
|
|||
* @param b band width
|
||||
* @param minBaseQual All bases with Q < minBaseQual are up'd to this value
|
||||
*/
|
||||
public BAQ(final double d, final double e, final int b, final byte minBaseQual) {
|
||||
cd = d; ce = e; cb = b; this.minBaseQual = minBaseQual;
|
||||
public BAQ(final double d, final double e, final int b, final byte minBaseQual, boolean includeClippedBases) {
|
||||
cd = d; ce = e; cb = b;
|
||||
this.minBaseQual = minBaseQual;
|
||||
this.includeClippedBases = includeClippedBases;
|
||||
initializeCachedData();
|
||||
}
|
||||
|
||||
|
|
@ -142,12 +142,6 @@ public class BAQ {
|
|||
return EPSILONS[ref][read][qualB];
|
||||
}
|
||||
|
||||
// private double calcEpsilon( byte ref, byte read, byte qualB ) {
|
||||
// double qual = qual2prob[qualB < minBaseQual ? minBaseQual : qualB];
|
||||
// return (ref > 3 || read > 3)? 1. : ref == read ? 1. - qual : qual * EM;
|
||||
// }
|
||||
|
||||
|
||||
// ####################################################################################################
|
||||
//
|
||||
// NOTE -- THIS CODE IS SYNCHRONIZED WITH CODE IN THE SAMTOOLS REPOSITORY. CHANGES TO THIS CODE SHOULD BE
|
||||
|
|
@ -169,12 +163,21 @@ public class BAQ {
|
|||
/*** initialization ***/
|
||||
// change coordinates
|
||||
int l_ref = ref.length;
|
||||
//int l_query = query.length;
|
||||
|
||||
|
||||
// set band width
|
||||
int bw2, bw = l_ref > l_query? l_ref : l_query;
|
||||
if (bw > cb) bw = cb;
|
||||
if (bw < Math.abs(l_ref - l_query)) bw = Math.abs(l_ref - l_query);
|
||||
if (cb < Math.abs(l_ref - l_query)) {
|
||||
bw = Math.abs(l_ref - l_query) + 3;
|
||||
//System.out.printf("SC cb=%d, bw=%d%n", cb, bw);
|
||||
}
|
||||
if (bw > cb) bw = cb;
|
||||
if (bw < Math.abs(l_ref - l_query)) {
|
||||
//int bwOld = bw;
|
||||
bw = Math.abs(l_ref - l_query);
|
||||
//System.out.printf("old bw is %d, new is %d%n", bwOld, bw);
|
||||
}
|
||||
//System.out.printf("c->bw = %d, bw = %d, l_ref = %d, l_query = %d\n", cb, bw, l_ref, l_query);
|
||||
bw2 = bw * 2 + 1;
|
||||
|
||||
// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
|
||||
|
|
@ -185,12 +188,14 @@ public class BAQ {
|
|||
// initialize transition probabilities
|
||||
double sM, sI, bM, bI;
|
||||
sM = sI = 1. / (2 * l_query + 2);
|
||||
bM = (1 - cd) / l_query; bI = cd / l_query; // (bM+bI)*l_query==1
|
||||
bM = (1 - cd) / l_ref; bI = cd / l_ref; // (bM+bI)*l_ref==1
|
||||
|
||||
double[] m = new double[9];
|
||||
m[0*3+0] = (1 - cd - cd) * (1 - sM); m[0*3+1] = m[0*3+2] = cd * (1 - sM);
|
||||
m[1*3+0] = (1 - ce) * (1 - sI); m[1*3+1] = ce * (1 - sI); m[1*3+2] = 0.;
|
||||
m[2*3+0] = 1 - ce; m[2*3+1] = 0.; m[2*3+2] = ce;
|
||||
|
||||
|
||||
/*** forward ***/
|
||||
// f[0]
|
||||
f[0][set_u(bw, 0, 0)] = s[0] = 1.;
|
||||
|
|
@ -274,8 +279,7 @@ public class BAQ {
|
|||
for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
|
||||
}
|
||||
|
||||
// TODO -- this appears to be a null operation overall. For debugging only?
|
||||
double pb;
|
||||
double pb;
|
||||
{ // b[0]
|
||||
int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
|
||||
double sum = 0.;
|
||||
|
|
@ -288,6 +292,7 @@ public class BAQ {
|
|||
pb = b[0][set_u(bw, 0, 0)] = sum / s[0]; // if everything works as is expected, pb == 1.0
|
||||
}
|
||||
|
||||
|
||||
/*** MAP ***/
|
||||
for (i = 1; i <= l_query; ++i) {
|
||||
double sum = 0., max = 0.;
|
||||
|
|
@ -450,30 +455,34 @@ public class BAQ {
|
|||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, IndexedFastaSequenceFile refReader) {
|
||||
// start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar
|
||||
int offset = getBandWidth() / 2;
|
||||
long start = Math.max(read.getAlignmentStart() - offset - getFirstInsertionOffset(read), 0);
|
||||
long stop = read.getAlignmentEnd() + offset + getLastInsertionOffset(read);
|
||||
long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart();
|
||||
long start = Math.max(readStart - offset - getFirstInsertionOffset(read), 0);
|
||||
long stop = (includeClippedBases ? read.getUnclippedEnd() : read.getAlignmentEnd()) + offset + getLastInsertionOffset(read);
|
||||
|
||||
if ( stop > refReader.getSequenceDictionary().getSequence(read.getReferenceName()).getSequenceLength() ) {
|
||||
return null;
|
||||
} else {
|
||||
// now that we have the start and stop, get the reference sequence covering it
|
||||
ReferenceSequence refSeq = refReader.getSubsequenceAt(read.getReferenceName(), start, stop);
|
||||
return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - read.getAlignmentStart()));
|
||||
return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - readStart));
|
||||
}
|
||||
}
|
||||
|
||||
public static int MAG = 1; // todo -- remove me for performance testing only
|
||||
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
|
||||
// note -- assumes ref is offset from the *CLIPPED* start
|
||||
BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
|
||||
int queryLen = queryEnd - queryStart;
|
||||
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
|
||||
for ( int i = 0; i < MAG; i++ )
|
||||
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
|
||||
return baqResult;
|
||||
}
|
||||
|
||||
// we need to bad ref by at least the bandwidth / 2 on either side
|
||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) {
|
||||
int queryStart = (int)(read.getAlignmentStart() - read.getUnclippedStart());
|
||||
int queryEnd = (int)(read.getReadLength() - (read.getUnclippedEnd() - read.getAlignmentEnd()));
|
||||
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
|
||||
int queryStart = includeClippedBases ? 0 : read.getAlignmentStart() - read.getUnclippedStart();
|
||||
int queryEnd = read.getReadLength() - (includeClippedBases ? 0 : read.getUnclippedEnd() - read.getAlignmentEnd());
|
||||
BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getReadBases(), read.getBaseQualities(), queryStart, queryEnd);
|
||||
|
||||
// cap quals
|
||||
|
|
@ -485,7 +494,8 @@ public class BAQ {
|
|||
return null;
|
||||
case H : case P : // ignore pads and hard clips
|
||||
break;
|
||||
case I : case S :
|
||||
case S : refI += l; // move the reference too, in addition to I
|
||||
case I :
|
||||
// todo -- is it really the case that we want to treat I and S the same?
|
||||
for ( int i = readI; i < readI + l; i++ ) baqResult.bq[i] = baqResult.rawQuals[i];
|
||||
readI += l;
|
||||
|
|
@ -502,6 +512,8 @@ public class BAQ {
|
|||
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
|
||||
}
|
||||
}
|
||||
if ( readI != read.getReadLength() ) // odd cigar string
|
||||
System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length);
|
||||
|
||||
return baqResult;
|
||||
}
|
||||
|
|
@ -532,7 +544,7 @@ public class BAQ {
|
|||
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
|
||||
|
||||
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
|
||||
if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything
|
||||
if ( calculationType == CalculationMode.OFF) { // we don't want to do anything
|
||||
; // just fall though
|
||||
} else if ( excludeReadFromBAQ(read) ) {
|
||||
; // just fall through
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class BAQSamIterator implements StingSAMIterator {
|
|||
* @param qmode
|
||||
*/
|
||||
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
|
||||
if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE");
|
||||
if ( cmode == BAQ.CalculationMode.OFF) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
|
||||
|
||||
this.refReader = refReader;
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
new ArrayList<SamRecordFilter>(),
|
||||
false,
|
||||
false,
|
||||
BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue