Recalibrator by NQS. Had this puppy running all afternoon. Thing had got through 100,000,000 reads before I decided to delete my sting tree. *sigh*, a little more delay.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1811 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-10-12 20:55:02 +00:00
parent ee0afba0af
commit 0d73fe69e7
1 changed files with 160 additions and 0 deletions

View File

@ -0,0 +1,160 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.StringTokenizer;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Oct 11, 2009
* Time: 11:04:07 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSRecalibratorWalker extends ReadWalker<SAMRecord,SAMFileWriter> {
@Argument(fullName="recalibrationTable", shortName="rt", doc="Table detailing NQS recalibration by reported, minimum, and maximum", required=true)
String recalFile = null;
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
public SAMFileWriter outBam = null;
final static int MIN_RECALIBRATION_OBSERVATIONS = 10000;
final static int WIN_SIDE = 11;
final static int QMAX = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
protected byte[][][] RECALIBRATION_TABLE;
public void initialize() {
// initialize the table
RECALIBRATION_TABLE = initializeQualityRecalibrationTable();
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(recalFile));
} catch (FileNotFoundException e) {
throw new StingException("File "+recalFile+" not found",e);
}
try {
String header = reader.readLine();
logger.debug(header);
while( reader.ready() ) {
StringTokenizer tok = new StringTokenizer(reader.readLine());
int repQ = Integer.valueOf(tok.nextToken());
int minQ = Integer.valueOf(tok.nextToken());
int maxQ = Integer.valueOf(tok.nextToken());
int nObserv = Integer.valueOf(tok.nextToken());
tok.nextToken(); // skip one - empirical mismatch rate
int empQ = Integer.valueOf(tok.nextToken());
if ( nObserv > MIN_RECALIBRATION_OBSERVATIONS ) {
RECALIBRATION_TABLE[repQ][minQ][maxQ] = (byte) empQ;
}
// System.out.println(repQ);
}
} catch (IOException e) {
throw new StingException("File "+recalFile+" could not be read",e);
}
}
public SAMFileWriter reduceInit() {
return outBam;
}
public SAMRecord map(char[] ref, SAMRecord read) {
byte[] initialQualities = read.getBaseQualities();
byte[] finalQualities = getNewQualitiesByNQS(initialQualities);
return recalibrateRead(read, finalQualities);
}
public byte[] getNewQualitiesByNQS( byte [] initQuals ) {
byte [] newQuals = new byte[initQuals.length];
for ( int offset = 0; offset < initQuals.length; offset ++ ) {
newQuals[offset] = qualityByNQS(initQuals,offset);
}
return newQuals;
}
public SAMFileWriter reduce( SAMRecord newRead, SAMFileWriter outwriter ) {
if ( outwriter == null ) {
out.println(newRead);
} else {
outwriter.addAlignment(newRead);
}
return outwriter;
}
public byte qualityByNQS( byte [] quals, int offset ) {
Pair<Byte,Byte> minMaxQuality = getMinMaxQuality(quals,offset);
return nqsLookup( quals[offset] , minMaxQuality.getFirst() , minMaxQuality.getSecond() );
}
public Pair<Byte,Byte> getMinMaxQuality( byte [] quals, int offset ) {
int start;
int end;
if ( offset-WIN_SIDE < 0 ) {
start = 0;
} else {
start = offset - WIN_SIDE;
}
if ( offset + WIN_SIDE > quals.length ) {
end = quals.length;
} else {
end = offset + WIN_SIDE;
}
byte minQuality = Byte.MAX_VALUE;
byte maxQuality = Byte.MIN_VALUE;
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
if ( quals[i] < minQuality ) {
minQuality = quals[i];
}
if ( quals[i] > maxQuality ) {
maxQuality = quals[i];
}
}
}
return new Pair<Byte,Byte>(minQuality,maxQuality);
}
public byte nqsLookup( byte repQ, byte minQ, byte maxQ ) {
return RECALIBRATION_TABLE[(int) repQ][(int) minQ][(int) maxQ];
}
public SAMRecord recalibrateRead( SAMRecord read, byte[] newQualities ) {
read.setBaseQualities(newQualities);
return read;
}
private byte[][][] initializeQualityRecalibrationTable() {
byte [][][] table = new byte[QMAX][QMAX][QMAX];
for ( int qrep = 0; qrep < QMAX; qrep ++ ) {
for ( int qmin = 0; qmin < QMAX; qmin ++ ) {
for ( int qmax = 0; qmax < QMAX; qmax ++ ) {
table[qrep][qmin][qmax] = (byte) qrep;
// default value is the reported q-score
}
}
}
return table;
}
}