Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
902ca7ea70
|
|
@ -25,13 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.threading.ThreadLocalArray;
|
import org.broadinstitute.sting.utils.threading.ThreadLocalArray;
|
||||||
|
|
||||||
|
|
@ -47,13 +45,12 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
|
||||||
if( !skip[offset] ) {
|
|
||||||
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
||||||
|
|
||||||
byte[] tempQualArray = threadLocalTempQualArray.get();
|
byte[] tempQualArray = threadLocalTempQualArray.get();
|
||||||
double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get();
|
double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get();
|
||||||
|
|
||||||
|
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||||
|
if( !skip[offset] ) {
|
||||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
|
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
|
||||||
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
|
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
|
||||||
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
|
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
|
||||||
|
|
@ -67,8 +64,6 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
||||||
final byte qual = tempQualArray[eventIndex];
|
final byte qual = tempQualArray[eventIndex];
|
||||||
final double isError = tempFractionalErrorArray[eventIndex];
|
final double isError = tempFractionalErrorArray[eventIndex];
|
||||||
|
|
||||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
|
||||||
|
|
||||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||||
|
|
||||||
for (int i = 2; i < covariates.length; i++) {
|
for (int i = 2; i < covariates.length; i++) {
|
||||||
|
|
|
||||||
|
|
@ -23,17 +23,17 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.SeekableBufferedStream;
|
||||||
|
import org.broad.tribble.util.SeekableFileStream;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -68,6 +68,9 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private final File mFile;
|
private final File mFile;
|
||||||
|
|
||||||
|
//TODO: figure out a good value for this buffer size
|
||||||
|
private final int BUFFERED_STREAM_BUFFER_SIZE=8192;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of sequences stored in this index.
|
* Number of sequences stored in this index.
|
||||||
*/
|
*/
|
||||||
|
|
@ -78,8 +81,8 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
private final long[] sequenceStartCache;
|
private final long[] sequenceStartCache;
|
||||||
|
|
||||||
private FileInputStream fileStream;
|
private SeekableFileStream fileStream;
|
||||||
private FileChannel fileChannel;
|
private SeekableBufferedStream bufferedStream;
|
||||||
|
|
||||||
public GATKBAMIndex(final File file) {
|
public GATKBAMIndex(final File file) {
|
||||||
mFile = file;
|
mFile = file;
|
||||||
|
|
@ -277,12 +280,11 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||||
sequenceStartCache[i] = position();
|
sequenceStartCache[i] = position();
|
||||||
|
|
||||||
// System.out.println("# Sequence TID: " + i);
|
// System.out.println("# Sequence TID: " + i);
|
||||||
final int nBins = readInteger();
|
final int nBins = readInteger();
|
||||||
// System.out.println("# nBins: " + nBins);
|
// System.out.println("# nBins: " + nBins);
|
||||||
for (int j = 0; j < nBins; j++) {
|
for (int j = 0; j < nBins; j++) {
|
||||||
skipInteger();
|
final int bin = readInteger();
|
||||||
final int nChunks = readInteger();
|
final int nChunks = readInteger();
|
||||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||||
skipBytes(16 * nChunks);
|
skipBytes(16 * nChunks);
|
||||||
|
|
@ -290,15 +292,18 @@ public class GATKBAMIndex {
|
||||||
final int nLinearBins = readInteger();
|
final int nLinearBins = readInteger();
|
||||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||||
skipBytes(8 * nLinearBins);
|
skipBytes(8 * nLinearBins);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sequenceStartCache[referenceSequence] = position();
|
sequenceStartCache[referenceSequence] = position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void openIndexFile() {
|
private void openIndexFile() {
|
||||||
try {
|
try {
|
||||||
fileStream = new FileInputStream(mFile);
|
fileStream = new SeekableFileStream(mFile);
|
||||||
fileChannel = fileStream.getChannel();
|
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||||
|
|
@ -307,7 +312,7 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void closeIndexFile() {
|
private void closeIndexFile() {
|
||||||
try {
|
try {
|
||||||
fileChannel.close();
|
bufferedStream.close();
|
||||||
fileStream.close();
|
fileStream.close();
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
|
|
@ -334,10 +339,6 @@ public class GATKBAMIndex {
|
||||||
return buffer.getInt();
|
return buffer.getInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipInteger() {
|
|
||||||
skipBytes(INT_SIZE_IN_BYTES);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||||
* @param count Number of longs to read.
|
* @param count Number of longs to read.
|
||||||
|
|
@ -356,7 +357,9 @@ public class GATKBAMIndex {
|
||||||
private void read(final ByteBuffer buffer) {
|
private void read(final ByteBuffer buffer) {
|
||||||
try {
|
try {
|
||||||
int bytesExpected = buffer.limit();
|
int bytesExpected = buffer.limit();
|
||||||
int bytesRead = fileChannel.read(buffer);
|
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||||
|
//and put the result into the bytebuffer after the if statement.
|
||||||
|
int bytesRead = bufferedStream.read(byteArray,0,bytesExpected);
|
||||||
|
|
||||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||||
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
||||||
|
|
@ -367,6 +370,7 @@ public class GATKBAMIndex {
|
||||||
"Please try re-indexing the corresponding BAM file.",
|
"Please try re-indexing the corresponding BAM file.",
|
||||||
mFile));
|
mFile));
|
||||||
}
|
}
|
||||||
|
buffer.put(byteArray,0,bytesRead);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||||
|
|
@ -380,10 +384,13 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
private ByteBuffer buffer = null;
|
private ByteBuffer buffer = null;
|
||||||
|
|
||||||
|
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||||
|
private byte[] byteArray=null;
|
||||||
private ByteBuffer getBuffer(final int size) {
|
private ByteBuffer getBuffer(final int size) {
|
||||||
if(buffer == null || buffer.capacity() < size) {
|
if(buffer == null || buffer.capacity() < size) {
|
||||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||||
buffer = ByteBuffer.allocate(size);
|
buffer = ByteBuffer.allocate(size);
|
||||||
|
byteArray = new byte[size];
|
||||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
}
|
}
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
|
@ -393,7 +400,13 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void skipBytes(final int count) {
|
private void skipBytes(final int count) {
|
||||||
try {
|
try {
|
||||||
fileChannel.position(fileChannel.position() + count);
|
|
||||||
|
//try to skip forward the requested amount.
|
||||||
|
long skipped = bufferedStream.skip(count);
|
||||||
|
|
||||||
|
if( skipped != count ) { //if not managed to skip the requested amount
|
||||||
|
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
|
|
@ -402,7 +415,8 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void seek(final long position) {
|
private void seek(final long position) {
|
||||||
try {
|
try {
|
||||||
fileChannel.position(position);
|
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||||
|
bufferedStream.seek(position);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
||||||
|
|
@ -415,7 +429,7 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
private long position() {
|
private long position() {
|
||||||
try {
|
try {
|
||||||
return fileChannel.position();
|
return bufferedStream.position();
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
|
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
|
||||||
|
|
|
||||||
|
|
@ -33,14 +33,13 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider
|
||||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
|
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
|
||||||
|
import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction;
|
||||||
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
|
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
|
||||||
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.LinkedList;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A nano-scheduling version of TraverseReads.
|
* A nano-scheduling version of TraverseReads.
|
||||||
|
|
@ -60,6 +59,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
||||||
|
|
||||||
public TraverseReadsNano(int nThreads) {
|
public TraverseReadsNano(int nThreads) {
|
||||||
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(nThreads);
|
nanoScheduler = new NanoScheduler<MapData, MapResult, T>(nThreads);
|
||||||
|
nanoScheduler.setProgressFunction(new NSProgressFunction<MapData>() {
|
||||||
|
@Override
|
||||||
|
public void progress(MapData lastProcessedMap) {
|
||||||
|
if ( lastProcessedMap.refContext != null )
|
||||||
|
printProgress(lastProcessedMap.refContext.getLocus());
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -78,6 +84,7 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
||||||
public T traverse(ReadWalker<M,T> walker,
|
public T traverse(ReadWalker<M,T> walker,
|
||||||
ReadShardDataProvider dataProvider,
|
ReadShardDataProvider dataProvider,
|
||||||
T sum) {
|
T sum) {
|
||||||
|
if ( logger.isDebugEnabled() )
|
||||||
logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider));
|
logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider));
|
||||||
|
|
||||||
if( !dataProvider.hasReads() )
|
if( !dataProvider.hasReads() )
|
||||||
|
|
@ -87,14 +94,10 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
||||||
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
|
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
|
||||||
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
|
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
|
||||||
|
|
||||||
final List<MapData> aggregatedInputs = aggregateMapData(dataProvider);
|
final Iterator<MapData> aggregatedInputs = aggregateMapData(dataProvider);
|
||||||
final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce);
|
final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce);
|
||||||
|
|
||||||
final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
|
|
||||||
final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
|
|
||||||
|
|
||||||
updateCumulativeMetrics(dataProvider.getShard());
|
updateCumulativeMetrics(dataProvider.getShard());
|
||||||
printProgress(locus);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -107,13 +110,18 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
||||||
* @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
|
* @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
|
||||||
* should execute
|
* should execute
|
||||||
*/
|
*/
|
||||||
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
|
private Iterator<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
|
||||||
|
return new Iterator<MapData>() {
|
||||||
final ReadView reads = new ReadView(dataProvider);
|
final ReadView reads = new ReadView(dataProvider);
|
||||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||||
|
final Iterator<SAMRecord> readIterator = reads.iterator();
|
||||||
|
|
||||||
final List<MapData> mapData = new LinkedList<MapData>();
|
@Override public boolean hasNext() { return readIterator.hasNext(); }
|
||||||
for ( final SAMRecord read : reads ) {
|
|
||||||
|
@Override
|
||||||
|
public MapData next() {
|
||||||
|
final SAMRecord read = readIterator.next();
|
||||||
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
|
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
|
||||||
? reference.getReferenceContext(read)
|
? reference.getReferenceContext(read)
|
||||||
: null;
|
: null;
|
||||||
|
|
@ -126,10 +134,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
||||||
// update the number of reads we've seen
|
// update the number of reads we've seen
|
||||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||||
|
|
||||||
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
|
return new MapData((GATKSAMRecord)read, refContext, tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mapData;
|
@Override public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Remove not supported");
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rough category of indel type (insertion, deletion, multi-allelic, other)
|
* Assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.)
|
||||||
*/
|
*/
|
||||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class VariantType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
|
|
@ -26,41 +26,35 @@ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnota
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
int run;
|
StringBuffer type = new StringBuffer("");
|
||||||
if (vc.isMixed()) {
|
if ( vc.isVariant() && !vc.isBiallelic() )
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
type.append("MULTIALLELIC_");
|
||||||
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
|
||||||
return map;
|
|
||||||
|
|
||||||
}
|
if ( !vc.isIndel() ) {
|
||||||
else if ( vc.isIndel() ) {
|
type.append(vc.getType().toString());
|
||||||
String type="";
|
} else {
|
||||||
if (!vc.isBiallelic())
|
|
||||||
type = "MULTIALLELIC_INDEL";
|
|
||||||
else {
|
|
||||||
if (vc.isSimpleInsertion())
|
if (vc.isSimpleInsertion())
|
||||||
type = "INS.";
|
type.append("INSERTION.");
|
||||||
else if (vc.isSimpleDeletion())
|
else if (vc.isSimpleDeletion())
|
||||||
type = "DEL.";
|
type.append("DELETION.");
|
||||||
else
|
else
|
||||||
type = "OTHER.";
|
type.append("COMPLEX.");
|
||||||
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
||||||
for (int k : inds) {
|
type.append(IndelUtils.getIndelClassificationName(inds.get(0)));
|
||||||
type = type+ IndelUtils.getIndelClassificationName(k)+".";
|
|
||||||
|
for (int i = 1; i < inds.size(); i++ ) {
|
||||||
|
type.append(".");
|
||||||
|
type.append(IndelUtils.getIndelClassificationName(inds.get(i)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%s", type));
|
map.put(getKeyNames().get(0), String.format("%s", type));
|
||||||
return map;
|
return map;
|
||||||
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
public List<String> getKeyNames() { return Arrays.asList("VariantType"); }
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("IndelType"); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("VariantType", 1, VCFHeaderLineType.String, "Variant type description")); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("IndelType", 1, VCFHeaderLineType.String, "Indel type description")); }
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.recalibration.*;
|
import org.broadinstitute.sting.utils.recalibration.*;
|
||||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||||
|
|
@ -53,7 +52,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
|
|
@ -194,14 +192,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
||||||
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
|
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
|
||||||
|
|
||||||
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
||||||
|
referenceReader = getToolkit().getReferenceDataSource().getReference();
|
||||||
try {
|
|
||||||
// fasta reference reader for use with BAQ calculation
|
|
||||||
referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
|
||||||
} catch( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private RecalibrationEngine initializeRecalibrationEngine() {
|
private RecalibrationEngine initializeRecalibrationEngine() {
|
||||||
|
|
@ -425,6 +416,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] calculateBAQArray( final GATKSAMRecord read ) {
|
private byte[] calculateBAQArray( final GATKSAMRecord read ) {
|
||||||
|
// todo -- it would be good to directly use the BAQ qualities rather than encoding and decoding the result and using the special @ value
|
||||||
baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
|
baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
|
||||||
return BAQ.getBAQTag(read);
|
return BAQ.getBAQTag(read);
|
||||||
}
|
}
|
||||||
|
|
@ -452,6 +444,8 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(Long result) {
|
public void onTraversalDone(Long result) {
|
||||||
|
recalibrationEngine.finalizeData();
|
||||||
|
|
||||||
logger.info("Calculating quantized quality scores...");
|
logger.info("Calculating quantized quality scores...");
|
||||||
quantizeQualityScores();
|
quantizeQualityScores();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -34,4 +33,6 @@ public interface RecalibrationEngine {
|
||||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
|
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
|
||||||
|
|
||||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
|
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
|
||||||
|
|
||||||
|
public void finalizeData();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,15 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
|
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
|
||||||
|
|
@ -58,8 +56,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
||||||
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
||||||
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
||||||
|
|
||||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
|
||||||
|
|
||||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||||
|
|
||||||
for (int i = 2; i < covariates.length; i++) {
|
for (int i = 2; i < covariates.length; i++) {
|
||||||
|
|
@ -93,6 +89,34 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
||||||
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
|
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create derived recalibration data tables
|
||||||
|
*
|
||||||
|
* Assumes that all of the principal tables (by quality score) have been completely updated,
|
||||||
|
* and walks over this data to create summary data tables like by read group table.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void finalizeData() {
|
||||||
|
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
|
||||||
|
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
|
||||||
|
|
||||||
|
// iterate over all values in the qual table
|
||||||
|
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||||
|
final int rgKey = leaf.keys[0];
|
||||||
|
final int eventIndex = leaf.keys[2];
|
||||||
|
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
|
||||||
|
final RecalDatum qualDatum = leaf.value;
|
||||||
|
|
||||||
|
if ( rgDatum == null ) {
|
||||||
|
// create a copy of qualDatum, and initialize byReadGroup table with it
|
||||||
|
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
|
||||||
|
} else {
|
||||||
|
// combine the qual datum with the existing datum in the byReadGroup table
|
||||||
|
rgDatum.combine(qualDatum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||||
* if there isn't already one.
|
* if there isn't already one.
|
||||||
|
|
@ -121,34 +145,4 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
||||||
existingDatum.increment(1.0, isError);
|
existingDatum.increment(1.0, isError);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
|
|
||||||
* new item there if there isn't already one.
|
|
||||||
*
|
|
||||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
|
||||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
|
||||||
*
|
|
||||||
* @param table the table that holds/will hold our item
|
|
||||||
* @param qual qual for this event
|
|
||||||
* @param isError error value for this event
|
|
||||||
* @param keys location in table of our item
|
|
||||||
*/
|
|
||||||
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) {
|
|
||||||
final RecalDatum existingDatum = table.get(keys);
|
|
||||||
final RecalDatum newDatum = createDatumObject(qual, isError);
|
|
||||||
|
|
||||||
if ( existingDatum == null ) {
|
|
||||||
// No existing item, try to put a new one
|
|
||||||
if ( ! table.put(newDatum, keys) ) {
|
|
||||||
// Failed to put a new item because another thread came along and put an item here first.
|
|
||||||
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
|
|
||||||
table.get(keys).combine(newDatum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Easy case: already an item here, so combine it with our item
|
|
||||||
existingDatum.combine(newDatum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -122,9 +122,9 @@ public class IndelUtils {
|
||||||
|
|
||||||
ArrayList<Integer> inds = new ArrayList<Integer>();
|
ArrayList<Integer> inds = new ArrayList<Integer>();
|
||||||
if ( vc.isSimpleInsertion() ) {
|
if ( vc.isSimpleInsertion() ) {
|
||||||
indelAlleleString = vc.getAlternateAllele(0).getDisplayString();
|
indelAlleleString = vc.getAlternateAllele(0).getDisplayString().substring(1);
|
||||||
} else if ( vc.isSimpleDeletion() ) {
|
} else if ( vc.isSimpleDeletion() ) {
|
||||||
indelAlleleString = vc.getReference().getDisplayString();
|
indelAlleleString = vc.getReference().getDisplayString().substring(1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
inds.add(IND_FOR_OTHER_EVENT);
|
inds.add(IND_FOR_OTHER_EVENT);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An LRU cache implemented as an extension to LinkedHashMap
|
||||||
|
*/
|
||||||
|
public class LRUCache<K,V> extends LinkedHashMap<K,V> {
|
||||||
|
private int capacity; // Maximum number of items in the cache.
|
||||||
|
|
||||||
|
public LRUCache(int capacity) {
|
||||||
|
super(capacity+1, 1.0f, true); // Pass 'true' for accessOrder.
|
||||||
|
this.capacity = capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean removeEldestEntry(final Map.Entry entry) {
|
||||||
|
return (size() > this.capacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
state[i] being wrong.
|
state[i] being wrong.
|
||||||
*/
|
*/
|
||||||
public class BAQ {
|
public class BAQ {
|
||||||
|
private final static Logger logger = Logger.getLogger(BAQ.class);
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
|
|
||||||
public enum CalculationMode {
|
public enum CalculationMode {
|
||||||
|
|
@ -179,8 +181,7 @@ public class BAQ {
|
||||||
|
|
||||||
/*** initialization ***/
|
/*** initialization ***/
|
||||||
// change coordinates
|
// change coordinates
|
||||||
int l_ref = ref.length;
|
final int l_ref = ref.length;
|
||||||
|
|
||||||
|
|
||||||
// set band width
|
// set band width
|
||||||
int bw2, bw = l_ref > l_query? l_ref : l_query;
|
int bw2, bw = l_ref > l_query? l_ref : l_query;
|
||||||
|
|
@ -266,26 +267,6 @@ public class BAQ {
|
||||||
s[l_query+1] = sum; // the last scaling factor
|
s[l_query+1] = sum; // the last scaling factor
|
||||||
}
|
}
|
||||||
|
|
||||||
//gdbebug+
|
|
||||||
/*
|
|
||||||
double cac=0.;
|
|
||||||
// undo scaling of forward probabilities to obtain plain probability of observation given model
|
|
||||||
double[] su = new double[f[l_query].length];
|
|
||||||
{
|
|
||||||
double sum = 0.;
|
|
||||||
double[] logs = new double[s.length];
|
|
||||||
for (k=0; k < logs.length; k++) {
|
|
||||||
logs[k] = Math.log10(s[k]);
|
|
||||||
sum += logs[k];
|
|
||||||
}
|
|
||||||
for (k=0; k < f[l_query].length; k++)
|
|
||||||
su[k]= Math.log10(f[l_query][k])+ sum;
|
|
||||||
|
|
||||||
cac = MathUtils.softMax(su);
|
|
||||||
}
|
|
||||||
System.out.format("s:%f\n",cac);
|
|
||||||
// gdebug-
|
|
||||||
*/
|
|
||||||
/*** backward ***/
|
/*** backward ***/
|
||||||
// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
|
// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
|
||||||
for (k = 1; k <= l_ref; ++k) {
|
for (k = 1; k <= l_ref; ++k) {
|
||||||
|
|
@ -305,8 +286,8 @@ public class BAQ {
|
||||||
for (k = end; k >= beg; --k) {
|
for (k = end; k >= beg; --k) {
|
||||||
int u, v11, v01, v10;
|
int u, v11, v01, v10;
|
||||||
u = set_u(bw, i, k); v11 = set_u(bw, i+1, k+1); v10 = set_u(bw, i+1, k); v01 = set_u(bw, i, k+1);
|
u = set_u(bw, i, k); v11 = set_u(bw, i+1, k+1); v10 = set_u(bw, i+1, k); v01 = set_u(bw, i, k+1);
|
||||||
double e = (k >= l_ref? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart+i])) * bi1[v11];
|
final double e = (k >= l_ref? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart+i])) * bi1[v11];
|
||||||
bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
|
bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been folded into e.
|
||||||
bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
|
bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
|
||||||
bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
|
bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
|
||||||
}
|
}
|
||||||
|
|
@ -332,12 +313,12 @@ public class BAQ {
|
||||||
/*** MAP ***/
|
/*** MAP ***/
|
||||||
for (i = 1; i <= l_query; ++i) {
|
for (i = 1; i <= l_query; ++i) {
|
||||||
double sum = 0., max = 0.;
|
double sum = 0., max = 0.;
|
||||||
double[] fi = f[i], bi = b[i];
|
final double[] fi = f[i], bi = b[i];
|
||||||
int beg = 1, end = l_ref, x, max_k = -1;
|
int beg = 1, end = l_ref, x, max_k = -1;
|
||||||
x = i - bw; beg = beg > x? beg : x;
|
x = i - bw; beg = beg > x? beg : x;
|
||||||
x = i + bw; end = end < x? end : x;
|
x = i + bw; end = end < x? end : x;
|
||||||
for (k = beg; k <= end; ++k) {
|
for (k = beg; k <= end; ++k) {
|
||||||
int u = set_u(bw, i, k);
|
final int u = set_u(bw, i, k);
|
||||||
double z;
|
double z;
|
||||||
sum += (z = fi[u+0] * bi[u+0]); if (z > max) { max = z; max_k = (k-1)<<2 | 0; }
|
sum += (z = fi[u+0] * bi[u+0]); if (z > max) { max = z; max_k = (k-1)<<2 | 0; }
|
||||||
sum += (z = fi[u+1] * bi[u+1]); if (z > max) { max = z; max_k = (k-1)<<2 | 1; }
|
sum += (z = fi[u+1] * bi[u+1]); if (z > max) { max = z; max_k = (k-1)<<2 | 1; }
|
||||||
|
|
@ -531,7 +512,11 @@ public class BAQ {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// final SimpleTimer total = new SimpleTimer();
|
||||||
|
// final SimpleTimer local = new SimpleTimer();
|
||||||
|
// int n = 0;
|
||||||
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
|
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
|
||||||
|
// total.restart();
|
||||||
if ( queryStart < 0 ) throw new ReviewedStingException("BUG: queryStart < 0: " + queryStart);
|
if ( queryStart < 0 ) throw new ReviewedStingException("BUG: queryStart < 0: " + queryStart);
|
||||||
if ( queryEnd < 0 ) throw new ReviewedStingException("BUG: queryEnd < 0: " + queryEnd);
|
if ( queryEnd < 0 ) throw new ReviewedStingException("BUG: queryEnd < 0: " + queryEnd);
|
||||||
if ( queryEnd < queryStart ) throw new ReviewedStingException("BUG: queryStart < queryEnd : " + queryStart + " end =" + queryEnd);
|
if ( queryEnd < queryStart ) throw new ReviewedStingException("BUG: queryStart < queryEnd : " + queryStart + " end =" + queryEnd);
|
||||||
|
|
@ -539,7 +524,12 @@ public class BAQ {
|
||||||
// note -- assumes ref is offset from the *CLIPPED* start
|
// note -- assumes ref is offset from the *CLIPPED* start
|
||||||
BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
|
BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
|
||||||
int queryLen = queryEnd - queryStart;
|
int queryLen = queryEnd - queryStart;
|
||||||
|
// local.restart();
|
||||||
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
|
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
|
||||||
|
// local.stop();
|
||||||
|
// total.stop();
|
||||||
|
// if ( n++ % 100000 == 0 )
|
||||||
|
// logger.info("n = " + n + ": Total " + total.getElapsedTimeNano() + " local " + local.getElapsedTimeNano());
|
||||||
return baqResult;
|
return baqResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,13 +58,20 @@ public class NestedIntegerArray<T> {
|
||||||
|
|
||||||
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
||||||
|
|
||||||
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
if ( logger.isDebugEnabled() ) logger.debug(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
||||||
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
if ( logger.isDebugEnabled() ) logger.debug(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
|
|
||||||
data = new Object[dimensions[0]];
|
data = new Object[dimensions[0]];
|
||||||
preallocateArray(data, 0, dimensionsToPreallocate);
|
preallocateArray(data, 0, dimensionsToPreallocate);
|
||||||
|
|
||||||
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
if ( logger.isDebugEnabled() ) logger.debug(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the dimensions of this nested integer array. DO NOT MODIFY
|
||||||
|
*/
|
||||||
|
public int[] getDimensions() {
|
||||||
|
return dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -174,23 +181,23 @@ public class NestedIntegerArray<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Leaf {
|
public static class Leaf<T> {
|
||||||
public final int[] keys;
|
public final int[] keys;
|
||||||
public final Object value;
|
public final T value;
|
||||||
|
|
||||||
public Leaf(final int[] keys, final Object value) {
|
public Leaf(final int[] keys, final T value) {
|
||||||
this.keys = keys;
|
this.keys = keys;
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Leaf> getAllLeaves() {
|
public List<Leaf<T>> getAllLeaves() {
|
||||||
final List<Leaf> result = new ArrayList<Leaf>();
|
final List<Leaf<T>> result = new ArrayList<Leaf<T>>();
|
||||||
fillAllLeaves(data, new int[0], result);
|
fillAllLeaves(data, new int[0], result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillAllLeaves(final Object[] array, final int[] path, final List<Leaf> result) {
|
private void fillAllLeaves(final Object[] array, final int[] path, final List<Leaf<T>> result) {
|
||||||
for ( int key = 0; key < array.length; key++ ) {
|
for ( int key = 0; key < array.length; key++ ) {
|
||||||
final Object value = array[key];
|
final Object value = array[key];
|
||||||
if ( value == null )
|
if ( value == null )
|
||||||
|
|
@ -199,7 +206,7 @@ public class NestedIntegerArray<T> {
|
||||||
if ( value instanceof Object[] ) {
|
if ( value instanceof Object[] ) {
|
||||||
fillAllLeaves((Object[]) value, newPath, result);
|
fillAllLeaves((Object[]) value, newPath, result);
|
||||||
} else {
|
} else {
|
||||||
result.add(new Leaf(newPath, value));
|
result.add(new Leaf<T>(newPath, (T)value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@ public class ProgressMeter {
|
||||||
private final SimpleTimer timer = new SimpleTimer();
|
private final SimpleTimer timer = new SimpleTimer();
|
||||||
|
|
||||||
private GenomeLoc maxGenomeLoc = null;
|
private GenomeLoc maxGenomeLoc = null;
|
||||||
private String positionMessage = "starting";
|
private Position position = new Position(PositionStatus.STARTING);
|
||||||
private long nTotalRecordsProcessed = 0;
|
private long nTotalRecordsProcessed = 0;
|
||||||
|
|
||||||
final ProgressMeterDaemon progressMeterDaemon;
|
final ProgressMeterDaemon progressMeterDaemon;
|
||||||
|
|
@ -234,9 +234,65 @@ public class ProgressMeter {
|
||||||
this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);
|
this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);
|
||||||
|
|
||||||
// a pretty name for our position
|
// a pretty name for our position
|
||||||
this.positionMessage = maxGenomeLoc == null
|
this.position = maxGenomeLoc == null ? new Position(PositionStatus.IN_UNMAPPED_READS) : new Position(maxGenomeLoc);
|
||||||
? "unmapped reads"
|
}
|
||||||
: String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
|
|
||||||
|
/**
|
||||||
|
* Describes the status of this position marker, such as starting up, done, in the unmapped reads,
|
||||||
|
* or somewhere on the genome
|
||||||
|
*/
|
||||||
|
private enum PositionStatus {
|
||||||
|
STARTING("Starting"),
|
||||||
|
DONE("done"),
|
||||||
|
IN_UNMAPPED_READS("unmapped reads"),
|
||||||
|
ON_GENOME(null);
|
||||||
|
|
||||||
|
public final String message;
|
||||||
|
|
||||||
|
private PositionStatus(String message) {
|
||||||
|
this.message = message;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A pair of position status and the genome loc, if necessary. Used to get a
|
||||||
|
* status update message as needed, without the computational cost of formatting
|
||||||
|
* the genome loc string every time a progress notification happens (which is almost
|
||||||
|
* always not printed)
|
||||||
|
*/
|
||||||
|
private class Position {
|
||||||
|
final PositionStatus type;
|
||||||
|
final GenomeLoc maybeLoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a position object of any type != ON_GENOME
|
||||||
|
* @param type
|
||||||
|
*/
|
||||||
|
@Requires({"type != null", "type != PositionStatus.ON_GENOME"})
|
||||||
|
private Position(PositionStatus type) {
|
||||||
|
this.type = type;
|
||||||
|
this.maybeLoc = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a position object of type ON_GENOME at genomeloc loc
|
||||||
|
* @param loc
|
||||||
|
*/
|
||||||
|
@Requires("loc != null")
|
||||||
|
private Position(GenomeLoc loc) {
|
||||||
|
this.type = PositionStatus.ON_GENOME;
|
||||||
|
this.maybeLoc = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a human-readable representation of this position
|
||||||
|
*/
|
||||||
|
private String getMessage() {
|
||||||
|
if ( type == PositionStatus.ON_GENOME )
|
||||||
|
return maxGenomeLoc.getContig() + ":" + maxGenomeLoc.getStart();
|
||||||
|
else
|
||||||
|
return type.message;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -267,7 +323,7 @@ public class ProgressMeter {
|
||||||
updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds());
|
updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds());
|
||||||
|
|
||||||
logger.info(String.format("%15s %5.2e %s %s %5.1f%% %s %s",
|
logger.info(String.format("%15s %5.2e %s %s %5.1f%% %s %s",
|
||||||
positionMessage, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
|
position.getMessage(), progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
|
||||||
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
|
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -317,7 +373,7 @@ public class ProgressMeter {
|
||||||
public void notifyDone(final long nTotalRecordsProcessed) {
|
public void notifyDone(final long nTotalRecordsProcessed) {
|
||||||
// print out the progress meter
|
// print out the progress meter
|
||||||
this.nTotalRecordsProcessed = nTotalRecordsProcessed;
|
this.nTotalRecordsProcessed = nTotalRecordsProcessed;
|
||||||
this.positionMessage = "done";
|
this.position = new Position(PositionStatus.DONE);
|
||||||
printProgress(true);
|
printProgress(true);
|
||||||
|
|
||||||
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours",
|
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours",
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMTag;
|
import net.sf.samtools.SAMTag;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||||
|
|
@ -44,7 +45,8 @@ import java.io.File;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class BaseRecalibration {
|
public class BaseRecalibration {
|
||||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
private static Logger logger = Logger.getLogger(BaseRecalibration.class);
|
||||||
|
private final static boolean TEST_CACHING = false;
|
||||||
|
|
||||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
|
|
@ -54,12 +56,8 @@ public class BaseRecalibration {
|
||||||
private final int preserveQLessThan;
|
private final int preserveQLessThan;
|
||||||
private final boolean emitOriginalQuals;
|
private final boolean emitOriginalQuals;
|
||||||
|
|
||||||
// TODO -- was this supposed to be used somewhere?
|
private final NestedIntegerArray<Double> globalDeltaQs;
|
||||||
// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private final NestedIntegerArray<Double> deltaQReporteds;
|
||||||
// static {
|
|
||||||
// for (int i = 0; i < EventType.values().length; i++)
|
|
||||||
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -84,6 +82,44 @@ public class BaseRecalibration {
|
||||||
this.disableIndelQuals = disableIndelQuals;
|
this.disableIndelQuals = disableIndelQuals;
|
||||||
this.preserveQLessThan = preserveQLessThan;
|
this.preserveQLessThan = preserveQLessThan;
|
||||||
this.emitOriginalQuals = emitOriginalQuals;
|
this.emitOriginalQuals = emitOriginalQuals;
|
||||||
|
|
||||||
|
logger.info("Calculating cached tables...");
|
||||||
|
|
||||||
|
//
|
||||||
|
// Create a NestedIntegerArray<Double> that maps from rgKey x errorModel -> double,
|
||||||
|
// where the double is the result of this calculation. The entire calculation can
|
||||||
|
// be done upfront, on initialization of this BaseRecalibration structure
|
||||||
|
//
|
||||||
|
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
|
||||||
|
globalDeltaQs = new NestedIntegerArray<Double>( byReadGroupTable.getDimensions() );
|
||||||
|
logger.info("Calculating global delta Q table...");
|
||||||
|
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byReadGroupTable.getAllLeaves() ) {
|
||||||
|
final int rgKey = leaf.keys[0];
|
||||||
|
final int eventIndex = leaf.keys[1];
|
||||||
|
final double globalDeltaQ = calculateGlobalDeltaQ(rgKey, EventType.eventFrom(eventIndex));
|
||||||
|
globalDeltaQs.put(globalDeltaQ, rgKey, eventIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// The calculation of the deltaQ report is constant. key[0] and key[1] are the read group and qual, respectively
|
||||||
|
// and globalDeltaQ is a constant for the read group. So technically the delta Q reported is simply a lookup
|
||||||
|
// into a matrix indexed by rgGroup, qual, and event type.
|
||||||
|
// the code below actually creates this cache with a NestedIntegerArray calling into the actual
|
||||||
|
// calculateDeltaQReported code.
|
||||||
|
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
|
||||||
|
deltaQReporteds = new NestedIntegerArray<Double>( byQualTable.getDimensions() );
|
||||||
|
logger.info("Calculating delta Q reported table...");
|
||||||
|
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||||
|
final int rgKey = leaf.keys[0];
|
||||||
|
final int qual = leaf.keys[1];
|
||||||
|
final int eventIndex = leaf.keys[2];
|
||||||
|
final EventType event = EventType.eventFrom(eventIndex);
|
||||||
|
final double globalDeltaQ = getGlobalDeltaQ(rgKey, event);
|
||||||
|
final double deltaQReported = calculateDeltaQReported(rgKey, qual, event, globalDeltaQ, (byte)qual);
|
||||||
|
deltaQReporteds.put(deltaQReported, rgKey, qual, eventIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("done calculating cache");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -91,6 +127,18 @@ public class BaseRecalibration {
|
||||||
*
|
*
|
||||||
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
|
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
|
||||||
*
|
*
|
||||||
|
* Implements a serial recalibration of the reads using the combinational table.
|
||||||
|
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||||
|
*
|
||||||
|
* Given the full recalibration table, we perform the following preprocessing steps:
|
||||||
|
*
|
||||||
|
* - calculate the global quality score shift across all data [DeltaQ]
|
||||||
|
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||||
|
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||||
|
* - The final shift equation is:
|
||||||
|
*
|
||||||
|
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||||
|
*
|
||||||
* @param read the read to recalibrate
|
* @param read the read to recalibrate
|
||||||
*/
|
*/
|
||||||
public void recalibrateRead(final GATKSAMRecord read) {
|
public void recalibrateRead(final GATKSAMRecord read) {
|
||||||
|
|
@ -103,6 +151,7 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
|
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||||
|
final int readLength = read.getReadLength();
|
||||||
|
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||||
|
|
@ -111,58 +160,88 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
final byte[] quals = read.getBaseQualities(errorModel);
|
final byte[] quals = read.getBaseQualities(errorModel);
|
||||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
|
||||||
|
|
||||||
final int readLength = read.getReadLength();
|
// get the keyset for this base using the error model
|
||||||
|
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel);
|
||||||
|
|
||||||
|
// the rg key is constant over the whole read, the global deltaQ is too
|
||||||
|
final int rgKey = fullReadKeySet[0][0];
|
||||||
|
|
||||||
|
final double globalDeltaQ = getGlobalDeltaQ(rgKey, errorModel);
|
||||||
|
|
||||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||||
|
final byte origQual = quals[offset];
|
||||||
|
|
||||||
final byte originalQualityScore = quals[offset];
|
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||||
|
if ( origQual >= preserveQLessThan ) {
|
||||||
|
// get the keyset for this base using the error model
|
||||||
|
final int[] keySet = fullReadKeySet[offset];
|
||||||
|
final double deltaQReported = getDeltaQReported(keySet[0], keySet[1], errorModel, globalDeltaQ);
|
||||||
|
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, keySet, errorModel, globalDeltaQ, deltaQReported, origQual);
|
||||||
|
|
||||||
|
// calculate the recalibrated qual using the BQSR formula
|
||||||
|
double recalibratedQualDouble = origQual + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
|
|
||||||
|
// recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
|
|
||||||
|
// return the quantized version of the recalibrated quality
|
||||||
|
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
|
||||||
|
|
||||||
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
|
||||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
|
||||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
|
||||||
quals[offset] = recalibratedQualityScore;
|
quals[offset] = recalibratedQualityScore;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// finally update the base qualities in the read
|
||||||
read.setBaseQualities(quals, errorModel);
|
read.setBaseQualities(quals, errorModel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||||
|
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
|
||||||
|
|
||||||
/**
|
if ( TEST_CACHING ) {
|
||||||
* Implements a serial recalibration of the reads using the combinational table.
|
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
|
||||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
if ( calcd != cached )
|
||||||
*
|
throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + errorModel);
|
||||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
|
||||||
*
|
|
||||||
* - calculate the global quality score shift across all data [DeltaQ]
|
|
||||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
|
||||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
|
||||||
* - The final shift equation is:
|
|
||||||
*
|
|
||||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
|
||||||
*
|
|
||||||
* @param key The list of Comparables that were calculated from the covariates
|
|
||||||
* @param errorModel the event type
|
|
||||||
* @return A recalibrated quality score as a byte
|
|
||||||
*/
|
|
||||||
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
|
||||||
|
|
||||||
final byte qualFromRead = (byte)(long)key[1];
|
|
||||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getReadGroupTable(), key, errorModel);
|
|
||||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getQualityScoreTable(), key, errorModel, globalDeltaQ, qualFromRead);
|
|
||||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
|
||||||
|
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
|
||||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
|
||||||
|
|
||||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateGlobalDeltaQ(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel) {
|
return cachedWithDefault(cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
|
||||||
|
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
|
||||||
|
|
||||||
|
if ( TEST_CACHING ) {
|
||||||
|
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
|
||||||
|
if ( calcd != cached )
|
||||||
|
throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + qualKey + " / " + errorModel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cachedWithDefault(cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param d a Double (that may be null) that is the result of a delta Q calculation
|
||||||
|
* @return a double == d if d != null, or 0.0 if it is
|
||||||
|
*/
|
||||||
|
private double cachedWithDefault(final Double d) {
|
||||||
|
return d == null ? 0.0 : d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that this calculation is a constant for each rgKey and errorModel. We need only
|
||||||
|
* compute this value once for all data.
|
||||||
|
*
|
||||||
|
* @param rgKey
|
||||||
|
* @param errorModel
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||||
double result = 0.0;
|
double result = 0.0;
|
||||||
|
|
||||||
final RecalDatum empiricalQualRG = table.get(key[0], errorModel.index);
|
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
|
||||||
|
|
||||||
if (empiricalQualRG != null) {
|
if (empiricalQualRG != null) {
|
||||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||||
|
|
@ -172,10 +251,10 @@ public class BaseRecalibration {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateDeltaQReported(final NestedIntegerArray<RecalDatum> table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||||
double result = 0.0;
|
double result = 0.0;
|
||||||
|
|
||||||
final RecalDatum empiricalQualQS = table.get(key[0], key[1], errorModel.index);
|
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
|
||||||
if (empiricalQualQS != null) {
|
if (empiricalQualQS != null) {
|
||||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||||
|
|
@ -192,12 +271,28 @@ public class BaseRecalibration {
|
||||||
if (key[i] < 0)
|
if (key[i] < 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final RecalDatum empiricalQualCO = recalibrationTables.getTable(i).get(key[0], key[1], key[i], errorModel.index);
|
result += calculateDeltaQCovariate(recalibrationTables.getTable(i),
|
||||||
if (empiricalQualCO != null) {
|
key[0], key[1], key[i], errorModel,
|
||||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
globalDeltaQ, deltaQReported, qualFromRead);
|
||||||
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double calculateDeltaQCovariate(final NestedIntegerArray<RecalDatum> table,
|
||||||
|
final int rgKey,
|
||||||
|
final int qualKey,
|
||||||
|
final int tableKey,
|
||||||
|
final EventType errorModel,
|
||||||
|
final double globalDeltaQ,
|
||||||
|
final double deltaQReported,
|
||||||
|
final byte qualFromRead) {
|
||||||
|
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
|
||||||
|
if (empiricalQualCO != null) {
|
||||||
|
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||||
|
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);
|
||||||
|
} else {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.LRUCache;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The object temporarily held by a read that describes all of it's covariates.
|
* The object temporarily held by a read that describes all of it's covariates.
|
||||||
|
|
@ -11,12 +12,47 @@ import java.util.Arrays;
|
||||||
* @since 2/8/12
|
* @since 2/8/12
|
||||||
*/
|
*/
|
||||||
public class ReadCovariates {
|
public class ReadCovariates {
|
||||||
|
private final static Logger logger = Logger.getLogger(ReadCovariates.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How big should we let the LRU cache grow
|
||||||
|
*/
|
||||||
|
private static final int LRU_CACHE_SIZE = 500;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen.
|
||||||
|
* The cache allows us to avoid the expense of recreating these arrays for every read. The LRU
|
||||||
|
* keeps the total number of cached arrays to less than LRU_CACHE_SIZE.
|
||||||
|
*
|
||||||
|
* This is a thread local variable, so the total memory required may grow to N_THREADS x LRU_CACHE_SIZE
|
||||||
|
*/
|
||||||
|
private final static ThreadLocal<LRUCache<Integer, int[][][]>> keysCache = new ThreadLocal<LRUCache<Integer, int[][][]>>() {
|
||||||
|
@Override protected LRUCache<Integer, int[][][]> initialValue() {
|
||||||
|
return new LRUCache<Integer, int[][][]>(LRU_CACHE_SIZE);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Our keys, indexed by event type x read length x covariate
|
||||||
|
*/
|
||||||
private final int[][][] keys;
|
private final int[][][] keys;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The index of the current covariate, used by addCovariate
|
||||||
|
*/
|
||||||
private int currentCovariateIndex = 0;
|
private int currentCovariateIndex = 0;
|
||||||
|
|
||||||
public ReadCovariates(final int readLength, final int numberOfCovariates) {
|
public ReadCovariates(final int readLength, final int numberOfCovariates) {
|
||||||
|
final LRUCache<Integer, int[][][]> cache = keysCache.get();
|
||||||
|
final int[][][] cachedKeys = cache.get(readLength);
|
||||||
|
if ( cachedKeys == null ) {
|
||||||
|
// There's no cached value for read length so we need to create a new int[][][] array
|
||||||
|
if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + cache.size());
|
||||||
keys = new int[EventType.values().length][readLength][numberOfCovariates];
|
keys = new int[EventType.values().length][readLength][numberOfCovariates];
|
||||||
|
cache.put(readLength, keys);
|
||||||
|
} else {
|
||||||
|
keys = cachedKeys;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCovariateIndex(final int index) {
|
public void setCovariateIndex(final int index) {
|
||||||
|
|
@ -24,22 +60,26 @@ public class ReadCovariates {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to
|
* Update the keys for mismatch, insertion, and deletion for the current covariate at read offset
|
||||||
* @return
|
*
|
||||||
|
* @param mismatch the mismatch key value
|
||||||
|
* @param insertion the insertion key value
|
||||||
|
* @param deletion the deletion key value
|
||||||
|
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
|
||||||
*/
|
*/
|
||||||
public ReadCovariates clear() {
|
|
||||||
for ( int i = 0; i < keys.length; i++ )
|
|
||||||
for ( int j = 0; j < keys[i].length; j++)
|
|
||||||
Arrays.fill(keys[i][j], 0);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||||
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the keys for all covariates at read position for error model
|
||||||
|
*
|
||||||
|
* @param readPosition
|
||||||
|
* @param errorModel
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||||
return keys[errorModel.index][readPosition];
|
return keys[errorModel.index][readPosition];
|
||||||
}
|
}
|
||||||
|
|
@ -48,21 +88,12 @@ public class ReadCovariates {
|
||||||
return keys[errorModel.index];
|
return keys[errorModel.index];
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[] getMismatchesKeySet(final int readPosition) {
|
// ----------------------------------------------------------------------
|
||||||
return keys[EventType.BASE_SUBSTITUTION.index][readPosition];
|
//
|
||||||
}
|
// routines for testing
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
public int[] getInsertionsKeySet(final int readPosition) {
|
|
||||||
return keys[EventType.BASE_INSERTION.index][readPosition];
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] getDeletionsKeySet(final int readPosition) {
|
|
||||||
return keys[EventType.BASE_DELETION.index][readPosition];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Testing routines
|
|
||||||
*/
|
|
||||||
protected int[][] getMismatchesKeySet() {
|
protected int[][] getMismatchesKeySet() {
|
||||||
return keys[EventType.BASE_SUBSTITUTION.index];
|
return keys[EventType.BASE_SUBSTITUTION.index];
|
||||||
}
|
}
|
||||||
|
|
@ -74,4 +105,16 @@ public class ReadCovariates {
|
||||||
protected int[][] getDeletionsKeySet() {
|
protected int[][] getDeletionsKeySet() {
|
||||||
return keys[EventType.BASE_DELETION.index];
|
return keys[EventType.BASE_DELETION.index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected int[] getMismatchesKeySet(final int readPosition) {
|
||||||
|
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int[] getInsertionsKeySet(final int readPosition) {
|
||||||
|
return getKeySet(readPosition, EventType.BASE_INSERTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int[] getDeletionsKeySet(final int readPosition) {
|
||||||
|
return getKeySet(readPosition, EventType.BASE_DELETION);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -193,7 +193,7 @@ public class RecalDatum {
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// increment methods
|
// toString methods
|
||||||
//
|
//
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -206,22 +206,6 @@ public class RecalDatum {
|
||||||
return String.format("%s,%.2f,%.2f", toString(), getEstimatedQReported(), getEmpiricalQuality() - getEstimatedQReported());
|
return String.format("%s,%.2f,%.2f", toString(), getEstimatedQReported(), getEmpiricalQuality() - getEstimatedQReported());
|
||||||
}
|
}
|
||||||
|
|
||||||
// /**
|
|
||||||
// * We don't compare the estimated quality reported because it may be different when read from
|
|
||||||
// * report tables.
|
|
||||||
// *
|
|
||||||
// * @param o the other recal datum
|
|
||||||
// * @return true if the two recal datums have the same number of observations, errors and empirical quality.
|
|
||||||
// */
|
|
||||||
// @Override
|
|
||||||
// public boolean equals(Object o) {
|
|
||||||
// if (!(o instanceof RecalDatum))
|
|
||||||
// return false;
|
|
||||||
// RecalDatum other = (RecalDatum) o;
|
|
||||||
// return super.equals(o) &&
|
|
||||||
// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
|
||||||
// }
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// increment methods
|
// increment methods
|
||||||
|
|
@ -264,15 +248,14 @@ public class RecalDatum {
|
||||||
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
||||||
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
||||||
public synchronized void increment(final double incObservations, final double incMismatches) {
|
public synchronized void increment(final double incObservations, final double incMismatches) {
|
||||||
incrementNumObservations(incObservations);
|
numObservations += incObservations;
|
||||||
incrementNumMismatches(incMismatches);
|
numMismatches += incMismatches;
|
||||||
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
||||||
public synchronized void increment(final boolean isError) {
|
public synchronized void increment(final boolean isError) {
|
||||||
incrementNumObservations(1);
|
increment(1, isError ? 1 : 0.0);
|
||||||
if ( isError )
|
|
||||||
incrementNumMismatches(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
@ -286,7 +269,7 @@ public class RecalDatum {
|
||||||
*/
|
*/
|
||||||
@Requires("empiricalQuality == UNINITIALIZED")
|
@Requires("empiricalQuality == UNINITIALIZED")
|
||||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||||
private synchronized final void calcEmpiricalQuality() {
|
private synchronized void calcEmpiricalQuality() {
|
||||||
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@
|
||||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -99,9 +99,21 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
final ArrayList<Integer> indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask);
|
final ArrayList<Integer> indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask);
|
||||||
|
|
||||||
final int readLength = bases.length;
|
final int readLength = bases.length;
|
||||||
|
|
||||||
|
// this is necessary to ensure that we don't keep historical data in the ReadCovariates values
|
||||||
|
// since the context covariate may not span the entire set of values in read covariates
|
||||||
|
// due to the clipping of the low quality bases
|
||||||
|
if ( readLength != originalBases.length ) {
|
||||||
|
// don't both zeroing out if we are going to overwrite the whole array
|
||||||
|
for ( int i = 0; i < originalBases.length; i++ )
|
||||||
|
// this base has been clipped off, so zero out the covariate values here
|
||||||
|
values.addCovariate(0, 0, 0, i);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
|
final int readOffset = (negativeStrand ? readLength - i - 1 : i);
|
||||||
final int indelKey = indelKeys.get(i);
|
final int indelKey = indelKeys.get(i);
|
||||||
values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
|
values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, readOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// put the original bases back in
|
// put the original bases back in
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -77,6 +79,14 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
return keyForReadGroup((String) value);
|
return keyForReadGroup((String) value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the mapping from read group names to integer key values for all read groups in this covariate
|
||||||
|
* @return a set of mappings from read group names -> integer key values
|
||||||
|
*/
|
||||||
|
public Set<Map.Entry<String, Integer>> getKeyMap() {
|
||||||
|
return readGroupLookupTable.entrySet();
|
||||||
|
}
|
||||||
|
|
||||||
private int keyForReadGroup(final String readGroupId) {
|
private int keyForReadGroup(final String readGroupId) {
|
||||||
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
|
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
|
||||||
// synchronize only the table updates.
|
// synchronize only the table updates.
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,9 @@ import org.testng.annotations.Test;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class PileupWalkerIntegrationTest extends WalkerTest {
|
public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||||
|
String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam "
|
||||||
|
+ "-R " + hg19Reference + " -o %s ";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGnarleyFHSPileup() {
|
public void testGnarleyFHSPileup() {
|
||||||
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
||||||
|
|
@ -39,4 +42,31 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
|
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
|
||||||
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
|
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/************************/
|
||||||
|
|
||||||
|
//testing speedup to GATKBAMIndex
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamChr20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775"));
|
||||||
|
executeTest("Testing single on big BAM at start of chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamMid20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3"));
|
||||||
|
executeTest("Testing single on big BAM somewhere in chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamEnd20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63"));
|
||||||
|
executeTest("Testing single at end of chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBam20Many(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ",
|
||||||
|
1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1"));
|
||||||
|
executeTest("Testing single on big BAM many places", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
var exampleFASTA: Reference = _
|
var exampleFASTA: Reference = _
|
||||||
var refs: List[Reference] = _
|
var refs: List[Reference] = _
|
||||||
|
|
||||||
class Resource(val file: File, val name: String, val ref: Reference, val useName: Boolean = true, val makeSites: Boolean = true ) {
|
class Resource(val file: File, val name: String, val ref: Reference, val useName: Boolean = true, val makeSites: Boolean = true, val makeCallsIfBam: Boolean = true ) {
|
||||||
def destname(target: Reference): String = {
|
def destname(target: Reference): String = {
|
||||||
if ( useName )
|
if ( useName )
|
||||||
return name + "." + target.name + "." + getExtension(file)
|
return name + "." + target.name + "." + getExtension(file)
|
||||||
|
|
@ -68,6 +68,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
|
|
||||||
def isVCF(file: File) = file.getName.endsWith(".vcf")
|
def isVCF(file: File) = file.getName.endsWith(".vcf")
|
||||||
def isBAM(file: File) = file.getName.endsWith(".bam")
|
def isBAM(file: File) = file.getName.endsWith(".bam")
|
||||||
|
def isOUT(file: File) = file.getName.endsWith(".out")
|
||||||
def isFASTA(file: File) = file.getName.endsWith(".fasta")
|
def isFASTA(file: File) = file.getName.endsWith(".fasta")
|
||||||
|
|
||||||
var RESOURCES: List[Resource] = Nil
|
var RESOURCES: List[Resource] = Nil
|
||||||
|
|
@ -94,7 +95,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
|
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
|
||||||
|
|
||||||
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
||||||
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false, false, false))
|
||||||
}
|
}
|
||||||
|
|
||||||
def initializeStandardDataFiles() = {
|
def initializeStandardDataFiles() = {
|
||||||
|
|
@ -172,7 +173,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
// exampleFASTA file
|
// exampleFASTA file
|
||||||
//
|
//
|
||||||
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
||||||
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false, false, false))
|
||||||
}
|
}
|
||||||
|
|
||||||
def createBundleDirectories(dir: File) = {
|
def createBundleDirectories(dir: File) = {
|
||||||
|
|
@ -184,6 +185,15 @@ class GATKResourcesBundle extends QScript {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def createCurrentLink(bundleDir: File) = {
|
||||||
|
|
||||||
|
val currentLink = new File(BUNDLE_ROOT + "/current")
|
||||||
|
|
||||||
|
if ( currentLink.exists ) currentLink.delete()
|
||||||
|
|
||||||
|
add(new linkFile(bundleDir, currentLink))
|
||||||
|
}
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
if ( TEST )
|
if ( TEST )
|
||||||
initializeTestDataFiles();
|
initializeTestDataFiles();
|
||||||
|
|
@ -201,8 +211,10 @@ class GATKResourcesBundle extends QScript {
|
||||||
} else if ( isBAM(resource.file) ) {
|
} else if ( isBAM(resource.file) ) {
|
||||||
val f = copyBundleFile(resource, resource.ref)
|
val f = copyBundleFile(resource, resource.ref)
|
||||||
add(new IndexBAM(f))
|
add(new IndexBAM(f))
|
||||||
|
if ( resource.makeCallsIfBam ) {
|
||||||
@Output val outvcf: File = swapExt(f.getParent, f, ".bam", ".vcf")
|
@Output val outvcf: File = swapExt(f.getParent, f, ".bam", ".vcf")
|
||||||
add(new UG(resource.file, resource.ref.file, outvcf))
|
add(new UG(resource.file, resource.ref.file, outvcf))
|
||||||
|
}
|
||||||
} else if ( isVCF(resource.file) ) {
|
} else if ( isVCF(resource.file) ) {
|
||||||
for ( destRef <- refs ) {
|
for ( destRef <- refs ) {
|
||||||
val out = destFile(BUNDLE_DIR, destRef, resource.destname(destRef))
|
val out = destFile(BUNDLE_DIR, destRef, resource.destname(destRef))
|
||||||
|
|
@ -240,6 +252,9 @@ class GATKResourcesBundle extends QScript {
|
||||||
//throw new ReviewedStingException("Unknown file type: " + resource)
|
//throw new ReviewedStingException("Unknown file type: " + resource)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
createCurrentLink(BUNDLE_DIR)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
createBundleDirectories(DOWNLOAD_DIR)
|
createBundleDirectories(DOWNLOAD_DIR)
|
||||||
createDownloadsFromBundle(BUNDLE_DIR, DOWNLOAD_DIR)
|
createDownloadsFromBundle(BUNDLE_DIR, DOWNLOAD_DIR)
|
||||||
|
|
@ -249,7 +264,6 @@ class GATKResourcesBundle extends QScript {
|
||||||
|
|
||||||
def createDownloadsFromBundle(in: File, out: File) {
|
def createDownloadsFromBundle(in: File, out: File) {
|
||||||
Console.printf("Visiting %s%n", in)
|
Console.printf("Visiting %s%n", in)
|
||||||
// todo -- ignore some of the other files too (e.g. *.out); will test next time we make a bundle
|
|
||||||
if (! in.getName.startsWith(".")) {
|
if (! in.getName.startsWith(".")) {
|
||||||
if ( in.isDirectory ) {
|
if ( in.isDirectory ) {
|
||||||
out.mkdirs
|
out.mkdirs
|
||||||
|
|
@ -261,7 +275,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
if ( isBAM(in) ) {
|
if ( isBAM(in) ) {
|
||||||
add(new cpFile(in, out))
|
add(new cpFile(in, out))
|
||||||
add(new md5sum(out))
|
add(new md5sum(out))
|
||||||
} else {
|
} else if ( !isOUT(in) ) {
|
||||||
add(new GzipFile(in, out + ".gz"))
|
add(new GzipFile(in, out + ".gz"))
|
||||||
add(new md5sum(out + ".gz"))
|
add(new md5sum(out + ".gz"))
|
||||||
}
|
}
|
||||||
|
|
@ -299,6 +313,10 @@ class GATKResourcesBundle extends QScript {
|
||||||
def commandLine = "cp %s %s".format(in.getAbsolutePath, out.getAbsolutePath)
|
def commandLine = "cp %s %s".format(in.getAbsolutePath, out.getAbsolutePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class linkFile(@Input val in: File, @Output val out: File) extends CommandLineFunction {
|
||||||
|
def commandLine = "ln -s %s %s".format(in.getAbsolutePath, out.getAbsolutePath)
|
||||||
|
}
|
||||||
|
|
||||||
class md5sum(@Input val in: File) extends CommandLineFunction {
|
class md5sum(@Input val in: File) extends CommandLineFunction {
|
||||||
@Output val o: File = new File(in.getAbsolutePath + ".md5")
|
@Output val o: File = new File(in.getAbsolutePath + ".md5")
|
||||||
def commandLine = "md5sum %s > %s".format(in.getAbsolutePath, o)
|
def commandLine = "md5sum %s > %s".format(in.getAbsolutePath, o)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue