2009-05-13 04:24:18 +08:00
|
|
|
package org.broadinstitute.sting.secondarybase;
|
2009-04-10 04:51:37 +08:00
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMFileReader;
|
|
|
|
|
import net.sf.samtools.SAMFileWriter;
|
|
|
|
|
import net.sf.samtools.SAMFileWriterFactory;
|
2009-05-15 02:58:43 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import org.broadinstitute.sting.utils.QualityUtils;
|
|
|
|
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
|
|
|
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
|
|
|
|
|
|
|
|
|
import java.io.BufferedReader;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileReader;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.HashMap;
|
2009-04-10 04:51:37 +08:00
|
|
|
|
|
|
|
|
public class CombineSamAndFourProbs extends CommandLineProgram {
|
|
|
|
|
public static CombineSamAndFourProbs Instance = null;
|
|
|
|
|
|
2009-05-07 02:16:11 +08:00
|
|
|
@Argument(fullName="sam", shortName="S", doc="Input SAM file")
|
2009-04-10 04:51:37 +08:00
|
|
|
public File SAM;
|
2009-05-07 02:16:11 +08:00
|
|
|
@Argument(fullName="fourprob", shortName="F", doc="Input text file := read_name sq_field")
|
2009-04-10 04:51:37 +08:00
|
|
|
public File FOURPROBS;
|
2009-05-07 02:16:11 +08:00
|
|
|
@Argument(fullName="sam_out", shortName="O", doc="Output SAM file")
|
2009-04-10 04:51:37 +08:00
|
|
|
public File SAM_OUT;
|
|
|
|
|
|
|
|
|
|
public static void main(String[] argv) {
|
|
|
|
|
Instance = new CombineSamAndFourProbs();
|
|
|
|
|
start(Instance, argv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected int execute() {
|
|
|
|
|
BufferedReader fpreader = null;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
fpreader = new BufferedReader(new FileReader(FOURPROBS));
|
|
|
|
|
|
|
|
|
|
HashMap fourprobMap = new HashMap(27000000);
|
|
|
|
|
|
|
|
|
|
String fourprobLine;
|
|
|
|
|
int processed = 0;
|
|
|
|
|
while ((fourprobLine = fpreader.readLine()) != null) {
|
|
|
|
|
String[] fourprobPieces = fourprobLine.split("\\s+");
|
|
|
|
|
String[] sqfield = fourprobPieces[1].split(":");
|
|
|
|
|
byte[] sq = hexStringToBytes(sqfield[2]);
|
|
|
|
|
|
|
|
|
|
fourprobMap.put(fourprobPieces[0], sq);
|
|
|
|
|
|
|
|
|
|
if (processed % 1000000 == 0) {
|
|
|
|
|
System.out.println("Processed " + processed);
|
|
|
|
|
}
|
|
|
|
|
processed++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fpreader.close();
|
|
|
|
|
|
|
|
|
|
SAMFileReader sf = new SAMFileReader(SAM);
|
|
|
|
|
sf.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
|
|
|
|
|
|
|
|
|
SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sf.getFileHeader(), true, SAM_OUT);
|
|
|
|
|
|
|
|
|
|
for (SAMRecord sr : sf) {
|
|
|
|
|
String readname = sr.getReadName();
|
|
|
|
|
byte[] sq = (byte[]) fourprobMap.get(readname);
|
|
|
|
|
|
2009-04-10 12:18:20 +08:00
|
|
|
if (sq != null) {
|
|
|
|
|
if (sr.getReadNegativeStrandFlag()) {
|
|
|
|
|
sq = QualityUtils.reverseComplementCompressedQualityArray(sq);
|
|
|
|
|
}
|
2009-04-10 04:51:37 +08:00
|
|
|
|
2009-04-10 12:18:20 +08:00
|
|
|
sr.setAttribute("SQ", sq);
|
|
|
|
|
sw.addAlignment(sr);
|
|
|
|
|
}
|
2009-04-10 04:51:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sf.close();
|
|
|
|
|
sw.close();
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
System.err.println("There was an error.");
|
|
|
|
|
System.exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static String bytesToHexString(final byte[] data) {
|
|
|
|
|
final char[] chars = new char[2 * data.length];
|
|
|
|
|
for (int i = 0; i < data.length; i++) {
|
|
|
|
|
final byte b = data[i];
|
|
|
|
|
chars[2*i] = toHexDigit((b >> 4) & 0xF);
|
|
|
|
|
chars[2*i+1] = toHexDigit(b & 0xF);
|
|
|
|
|
}
|
|
|
|
|
return new String(chars);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static byte[] hexStringToBytes(final String s) throws NumberFormatException {
|
|
|
|
|
if (s.length() % 2 != 0) {
|
|
|
|
|
throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s);
|
|
|
|
|
}
|
|
|
|
|
final byte[] ret = new byte[s.length() / 2];
|
|
|
|
|
for (int i = 0; i < ret.length; ++i) {
|
|
|
|
|
ret[i] = (byte) ((fromHexDigit(s.charAt(i * 2)) << 4) | fromHexDigit(s.charAt(i * 2 + 1)));
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static char toHexDigit(final int value) {
|
|
|
|
|
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static int fromHexDigit(final char c) throws NumberFormatException {
|
|
|
|
|
final int ret = Character.digit(c, 16);
|
|
|
|
|
if (ret == -1) {
|
|
|
|
|
throw new NumberFormatException("Not a valid hex digit: " + c);
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|