Recalibrator by NQS. Had this puppy running all afternoon. Thing had got through 100,000,000 reads before I decided to delete my sting tree. *sigh*, a little more delay.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1811 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ee0afba0af
commit
0d73fe69e7
|
|
@ -0,0 +1,160 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: Ghost
|
||||||
|
* Date: Oct 11, 2009
|
||||||
|
* Time: 11:04:07 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class NQSRecalibratorWalker extends ReadWalker<SAMRecord,SAMFileWriter> {
|
||||||
|
@Argument(fullName="recalibrationTable", shortName="rt", doc="Table detailing NQS recalibration by reported, minimum, and maximum", required=true)
|
||||||
|
String recalFile = null;
|
||||||
|
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
|
||||||
|
public SAMFileWriter outBam = null;
|
||||||
|
|
||||||
|
final static int MIN_RECALIBRATION_OBSERVATIONS = 10000;
|
||||||
|
final static int WIN_SIDE = 11;
|
||||||
|
final static int QMAX = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||||
|
|
||||||
|
protected byte[][][] RECALIBRATION_TABLE;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
// initialize the table
|
||||||
|
RECALIBRATION_TABLE = initializeQualityRecalibrationTable();
|
||||||
|
BufferedReader reader;
|
||||||
|
try {
|
||||||
|
reader = new BufferedReader(new FileReader(recalFile));
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("File "+recalFile+" not found",e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
String header = reader.readLine();
|
||||||
|
logger.debug(header);
|
||||||
|
while( reader.ready() ) {
|
||||||
|
StringTokenizer tok = new StringTokenizer(reader.readLine());
|
||||||
|
int repQ = Integer.valueOf(tok.nextToken());
|
||||||
|
int minQ = Integer.valueOf(tok.nextToken());
|
||||||
|
int maxQ = Integer.valueOf(tok.nextToken());
|
||||||
|
int nObserv = Integer.valueOf(tok.nextToken());
|
||||||
|
tok.nextToken(); // skip one - empirical mismatch rate
|
||||||
|
int empQ = Integer.valueOf(tok.nextToken());
|
||||||
|
if ( nObserv > MIN_RECALIBRATION_OBSERVATIONS ) {
|
||||||
|
RECALIBRATION_TABLE[repQ][minQ][maxQ] = (byte) empQ;
|
||||||
|
}
|
||||||
|
// System.out.println(repQ);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new StingException("File "+recalFile+" could not be read",e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMFileWriter reduceInit() {
|
||||||
|
return outBam;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||||
|
byte[] initialQualities = read.getBaseQualities();
|
||||||
|
byte[] finalQualities = getNewQualitiesByNQS(initialQualities);
|
||||||
|
return recalibrateRead(read, finalQualities);
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getNewQualitiesByNQS( byte [] initQuals ) {
|
||||||
|
byte [] newQuals = new byte[initQuals.length];
|
||||||
|
for ( int offset = 0; offset < initQuals.length; offset ++ ) {
|
||||||
|
newQuals[offset] = qualityByNQS(initQuals,offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newQuals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMFileWriter reduce( SAMRecord newRead, SAMFileWriter outwriter ) {
|
||||||
|
if ( outwriter == null ) {
|
||||||
|
out.println(newRead);
|
||||||
|
} else {
|
||||||
|
outwriter.addAlignment(newRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
return outwriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte qualityByNQS( byte [] quals, int offset ) {
|
||||||
|
Pair<Byte,Byte> minMaxQuality = getMinMaxQuality(quals,offset);
|
||||||
|
return nqsLookup( quals[offset] , minMaxQuality.getFirst() , minMaxQuality.getSecond() );
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<Byte,Byte> getMinMaxQuality( byte [] quals, int offset ) {
|
||||||
|
int start;
|
||||||
|
int end;
|
||||||
|
if ( offset-WIN_SIDE < 0 ) {
|
||||||
|
start = 0;
|
||||||
|
} else {
|
||||||
|
start = offset - WIN_SIDE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( offset + WIN_SIDE > quals.length ) {
|
||||||
|
end = quals.length;
|
||||||
|
} else {
|
||||||
|
end = offset + WIN_SIDE;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte minQuality = Byte.MAX_VALUE;
|
||||||
|
byte maxQuality = Byte.MIN_VALUE;
|
||||||
|
|
||||||
|
for ( int i = start; i < end; i ++ ) {
|
||||||
|
if ( i != offset ) {
|
||||||
|
if ( quals[i] < minQuality ) {
|
||||||
|
minQuality = quals[i];
|
||||||
|
}
|
||||||
|
if ( quals[i] > maxQuality ) {
|
||||||
|
maxQuality = quals[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<Byte,Byte>(minQuality,maxQuality);
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte nqsLookup( byte repQ, byte minQ, byte maxQ ) {
|
||||||
|
return RECALIBRATION_TABLE[(int) repQ][(int) minQ][(int) maxQ];
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord recalibrateRead( SAMRecord read, byte[] newQualities ) {
|
||||||
|
read.setBaseQualities(newQualities);
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[][][] initializeQualityRecalibrationTable() {
|
||||||
|
byte [][][] table = new byte[QMAX][QMAX][QMAX];
|
||||||
|
for ( int qrep = 0; qrep < QMAX; qrep ++ ) {
|
||||||
|
for ( int qmin = 0; qmin < QMAX; qmin ++ ) {
|
||||||
|
for ( int qmax = 0; qmax < QMAX; qmax ++ ) {
|
||||||
|
table[qrep][qmin][qmax] = (byte) qrep;
|
||||||
|
// default value is the reported q-score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue