gatk-3.8/java/src/org/broadinstitute/sting/secondarybase/CombineSamAndFourProbs.java

120 lines
4.0 KiB
Java
Raw Normal View History

package org.broadinstitute.sting.secondarybase;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
public class CombineSamAndFourProbs extends CommandLineProgram {
public static CombineSamAndFourProbs Instance = null;
@Argument(fullName="sam", shortName="S", doc="Input SAM file")
public File SAM;
@Argument(fullName="fourprob", shortName="F", doc="Input text file := read_name sq_field")
public File FOURPROBS;
@Argument(fullName="sam_out", shortName="O", doc="Output SAM file")
public File SAM_OUT;
public static void main(String[] argv) {
Instance = new CombineSamAndFourProbs();
start(Instance, argv);
}
protected int execute() {
BufferedReader fpreader = null;
try {
fpreader = new BufferedReader(new FileReader(FOURPROBS));
HashMap fourprobMap = new HashMap(27000000);
String fourprobLine;
int processed = 0;
while ((fourprobLine = fpreader.readLine()) != null) {
String[] fourprobPieces = fourprobLine.split("\\s+");
String[] sqfield = fourprobPieces[1].split(":");
byte[] sq = hexStringToBytes(sqfield[2]);
fourprobMap.put(fourprobPieces[0], sq);
if (processed % 1000000 == 0) {
System.out.println("Processed " + processed);
}
processed++;
}
fpreader.close();
SAMFileReader sf = new SAMFileReader(SAM);
sf.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sf.getFileHeader(), true, SAM_OUT);
for (SAMRecord sr : sf) {
String readname = sr.getReadName();
byte[] sq = (byte[]) fourprobMap.get(readname);
if (sq != null) {
if (sr.getReadNegativeStrandFlag()) {
sq = QualityUtils.reverseComplementCompressedQualityArray(sq);
}
sr.setAttribute("SQ", sq);
sw.addAlignment(sr);
}
}
sf.close();
sw.close();
} catch (IOException e) {
System.err.println("There was an error.");
System.exit(1);
}
return 0;
}
static String bytesToHexString(final byte[] data) {
final char[] chars = new char[2 * data.length];
for (int i = 0; i < data.length; i++) {
final byte b = data[i];
chars[2*i] = toHexDigit((b >> 4) & 0xF);
chars[2*i+1] = toHexDigit(b & 0xF);
}
return new String(chars);
}
static byte[] hexStringToBytes(final String s) throws NumberFormatException {
if (s.length() % 2 != 0) {
throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s);
}
final byte[] ret = new byte[s.length() / 2];
for (int i = 0; i < ret.length; ++i) {
ret[i] = (byte) ((fromHexDigit(s.charAt(i * 2)) << 4) | fromHexDigit(s.charAt(i * 2 + 1)));
}
return ret;
}
private static char toHexDigit(final int value) {
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
}
private static int fromHexDigit(final char c) throws NumberFormatException {
final int ret = Character.digit(c, 16);
if (ret == -1) {
throw new NumberFormatException("Not a valid hex digit: " + c);
}
return ret;
}
}