gatk-3.8/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReplaceQuals.java

122 lines
4.4 KiB
Java
Executable File

package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
import net.sf.samtools.*;
import java.util.HashMap;
import java.io.File;
/**
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
* read to its home on the reference and noting the mismatch rate. It ignores reads with
* indels in them, treats high and low-quality references bases the same, and does not count
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
* in short order.
*
* @author Kiran Garimella
*/
public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
@Argument(shortName="inputQualsBAM",doc="BAM files containing qualities to be replaced",required=true)
public String inputQualsBAM;
@Argument(shortName="outputBAM", required=false, doc="output BAM file for reads with replaced quals")
public SAMFileWriter outputBAM = null;
public int MAX_READS_TO_LOAD = -1;
private HashMap<String, Pair<SAMRecord, SAMRecord>> readNameToPairs;
private int READ_PRINT_MOD = 100000;
private final boolean DEBUG = false;
public void initialize() {
readNameToPairs = new HashMap<String, Pair<SAMRecord, SAMRecord>>();
SAMFileReader samReader = new SAMFileReader(new File(inputQualsBAM));
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
int nReads = 0;
logger.info("Starting to read inputQualsBAM = " + inputQualsBAM);
for ( SAMRecord read : samReader ) {
//System.out.printf("READ is %s%n", read.format());
final String name = read.getReadName();
Pair<SAMRecord, SAMRecord> binding = readNameToPairs.containsKey(name) ? readNameToPairs.get(name) : new Pair<SAMRecord, SAMRecord>(null, null);
if ( read.getFirstOfPairFlag() ) {
binding.first = read;
} else {
binding.second = read;
}
readNameToPairs.put(name, binding);
if ( ++nReads % READ_PRINT_MOD == 0 ) {
logger.info(String.format(" Read %d reads so far...", nReads));
}
if ( nReads > MAX_READS_TO_LOAD && MAX_READS_TO_LOAD != -1 )
break;
}
logger.info("Done reading input BAM");
}
/**
*
*/
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
final String name = read.getReadName();
if ( readNameToPairs.containsKey(name) ) {
Pair<SAMRecord, SAMRecord> binding = readNameToPairs.get(name);
SAMRecord qRead = read.getFirstOfPairFlag() ? binding.first : binding.second;
if (qRead != null) {
if ( DEBUG ) {
System.out.printf("Replacing read %s quals with %s%n", read.getReadName(), qRead.getReadName());
System.out.printf("%s%n", read.getReadName());
System.out.printf("%s%n", qRead.getReadName());
System.out.printf("%s%n", read.getReadString());
System.out.printf("%s%n", qRead.getReadString());
System.out.printf("%s%n", read.getBaseQualityString());
System.out.printf("%s%n", qRead.getBaseQualityString());
//if (! read.getReadString().equals(qRead.getReadString()))
// throw new RuntimeException(String.format("BUG: equating %s and %s but bases are different", read.getReadName(), qRead.getReadName()));
}
read.setBaseQualities(qRead.getBaseQualities());
}
}
return read;
}
// -----------------------------------------------------------------------------------------------
// Standard i/o reduce
//
public void onTraversalDone(SAMFileWriter output) {
if ( output != null ) {
output.close();
}
}
public SAMFileWriter reduceInit() {
return outputBAM;
}
/**
*
*
*/
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
if ( output != null ) {
output.addAlignment(read);
} else {
out.println(read.format());
}
return output;
}
}