QualityUtils: added reverse function to reverse an array of bytes (and not complement it), BaseUtils: split qualToProb into itself and qualToErrProb, CovariateCounterWalker and LogisticRecalibrationWalker: several changes including a properly acocunting (only partly complete) for reversing AND complementing bases that are negative strand, PrintReadsWalker: created option to output reads to a BAM file rather than just to the sceern (useful for creating a downsampled BAM file)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@770 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7e77c62b49
commit
0219d33e10
|
|
@ -1,17 +1,49 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileWriterFactory;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
public class PrintReadsWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
out.println(read.format());
|
||||
return 1;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.util.Random;
|
||||
|
||||
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
|
||||
@Argument(fullName="outputBamFile", shortName="of", doc="Write output to this BAM filename instead of STDOUT", required=false)
|
||||
String outputBamFile = null;
|
||||
|
||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||
return read;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
public SAMFileWriter reduceInit() {
|
||||
if ( outputBamFile != null ) { // ! outputBamFile.equals("") ) {
|
||||
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
||||
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||
return fact.makeBAMWriter(header, true, new File(outputBamFile));
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
|
||||
if ( output != null ) {
|
||||
output.addAlignment(read);
|
||||
} else {
|
||||
out.println(read.format());
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public void onTraversalDone(SAMFileWriter output) {
|
||||
if ( output != null ) {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -92,15 +92,20 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
byte[] quals = read.getBaseQualities();
|
||||
byte[] recalQuals = new byte[quals.length];
|
||||
|
||||
// Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
quals = BaseUtils.reverse(quals);
|
||||
}
|
||||
|
||||
int numBases = read.getReadLength();
|
||||
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
||||
recalQuals[numBases-1] = quals[numBases-1]; // can't change last -- no dinuc
|
||||
for ( int i = 1; i < numBases-1; i++ ) { // skip first and last base, qual already set because no dinuc
|
||||
//recalQuals[numBases-1] = quals[numBases-1]; // can't change last -- no dinuc
|
||||
for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
||||
// Take into account that previous base is the next base in terms of machine chemistry if this is a negative strand
|
||||
int cycle = read.getReadNegativeStrandFlag() ? numBases - i - 1 : i;
|
||||
String dinuc = String.format("%c%c", bases[i + (read.getReadNegativeStrandFlag() ? 1 : -1)], bases[i]);
|
||||
byte qual = quals[i];
|
||||
//System.out.printf("dinuc %c %c%n", bases[i-1], bases[i]);
|
||||
//int cycle = i; //read.getReadNegativeStrandFlag() ? numBases - i - 1 : i;
|
||||
String dinuc = String.format("%c%c", bases[cycle - 1], bases[cycle]);
|
||||
byte qual = quals[cycle];
|
||||
LogisticRegressor regressor = regressors.get(dinuc);
|
||||
byte newQual;
|
||||
|
||||
|
|
@ -120,9 +125,11 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
newQual = qual;
|
||||
}
|
||||
|
||||
recalQuals[i] = newQual;
|
||||
recalQuals[cycle] = newQual;
|
||||
}
|
||||
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
recalQuals = BaseUtils.reverse(quals);
|
||||
//System.out.printf("OLD: %s%n", read.format());
|
||||
read.setBaseQualities(recalQuals);
|
||||
//System.out.printf("NEW: %s%n", read.format());
|
||||
|
|
|
|||
|
|
@ -125,4 +125,19 @@ public class BaseUtils {
|
|||
|
||||
return rcbases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse a byte array of bases
|
||||
* @param bases the byte array of bases
|
||||
* @return the reverse of the base byte array
|
||||
*/
|
||||
static public byte[] reverse(byte[] bases) {
|
||||
byte[] rcbases = new byte[bases.length];
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
rcbases[i] = bases[bases.length - 1];
|
||||
}
|
||||
|
||||
return rcbases;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,18 @@ public class QualityUtils {
|
|||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public double qualToProb(byte qual) {
|
||||
return 1.0 - Math.pow(10.0, ((double) qual)/-10.0);
|
||||
return 1.0 - qualToErrorProb(qual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a quality score to a probability of error. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
*
|
||||
* @param qual a quality score (0-40)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue