QualityUtils: added reverse function to reverse an array of bytes (and not complement it), BaseUtils: split qualToProb into itself and qualToErrProb, CovariateCounterWalker and LogisticRecalibrationWalker: several changes including a properly acocunting (only partly complete) for reversing AND complementing bases that are negative strand, PrintReadsWalker: created option to output reads to a BAM file rather than just to the sceern (useful for creating a downsampled BAM file)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@770 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-05-21 18:30:45 +00:00
parent 7e77c62b49
commit 0219d33e10
5 changed files with 173 additions and 43 deletions

View File

@ -1,17 +1,49 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.cmdLine.Argument;
public class PrintReadsWalker extends ReadWalker<Integer, Integer> {
public Integer map(char[] ref, SAMRecord read) {
out.println(read.format());
return 1;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.io.File;
import java.util.Random;
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
@Argument(fullName="outputBamFile", shortName="of", doc="Write output to this BAM filename instead of STDOUT", required=false)
String outputBamFile = null;
public SAMRecord map(char[] ref, SAMRecord read) {
return read;
}
public Integer reduceInit() { return 0; }
public SAMFileWriter reduceInit() {
if ( outputBamFile != null ) { // ! outputBamFile.equals("") ) {
SAMFileWriterFactory fact = new SAMFileWriterFactory();
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
return fact.makeBAMWriter(header, true, new File(outputBamFile));
}
else {
return null;
}
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
if ( output != null ) {
output.addAlignment(read);
} else {
out.println(read.format());
}
return output;
}
public void onTraversalDone(SAMFileWriter output) {
if ( output != null ) {
output.close();
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -92,15 +92,20 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
byte[] quals = read.getBaseQualities();
byte[] recalQuals = new byte[quals.length];
// Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
if (read.getReadNegativeStrandFlag()) {
bases = BaseUtils.simpleReverseComplement(bases);
quals = BaseUtils.reverse(quals);
}
int numBases = read.getReadLength();
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
recalQuals[numBases-1] = quals[numBases-1]; // can't change last -- no dinuc
for ( int i = 1; i < numBases-1; i++ ) { // skip first and last base, qual already set because no dinuc
//recalQuals[numBases-1] = quals[numBases-1]; // can't change last -- no dinuc
for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
// Take into account that previous base is the next base in terms of machine chemistry if this is a negative strand
int cycle = read.getReadNegativeStrandFlag() ? numBases - i - 1 : i;
String dinuc = String.format("%c%c", bases[i + (read.getReadNegativeStrandFlag() ? 1 : -1)], bases[i]);
byte qual = quals[i];
//System.out.printf("dinuc %c %c%n", bases[i-1], bases[i]);
//int cycle = i; //read.getReadNegativeStrandFlag() ? numBases - i - 1 : i;
String dinuc = String.format("%c%c", bases[cycle - 1], bases[cycle]);
byte qual = quals[cycle];
LogisticRegressor regressor = regressors.get(dinuc);
byte newQual;
@ -120,9 +125,11 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
newQual = qual;
}
recalQuals[i] = newQual;
recalQuals[cycle] = newQual;
}
if (read.getReadNegativeStrandFlag())
recalQuals = BaseUtils.reverse(quals);
//System.out.printf("OLD: %s%n", read.format());
read.setBaseQualities(recalQuals);
//System.out.printf("NEW: %s%n", read.format());

View File

@ -125,4 +125,19 @@ public class BaseUtils {
return rcbases;
}
/**
* Reverse a byte array of bases
* @param bases the byte array of bases
* @return the reverse of the base byte array
*/
static public byte[] reverse(byte[] bases) {
byte[] rcbases = new byte[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - 1];
}
return rcbases;
}
}

View File

@ -20,7 +20,18 @@ public class QualityUtils {
* @return a probability (0.0-1.0)
*/
static public double qualToProb(byte qual) {
return 1.0 - Math.pow(10.0, ((double) qual)/-10.0);
return 1.0 - qualToErrorProb(qual);
}
/**
* Convert a quality score to a probability of error. This is the Phred-style
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
*
* @param qual a quality score (0-40)
* @return a probability (0.0-1.0)
*/
static public double qualToErrorProb(byte qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
}
/**