SAMFileWriter doesn't appear to flush the buffer when its destructor is called. You have to call the close() method. Also, choose a random base for Ns in the forward and reverse strands so that samtools doesn't pitch a fit.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@338 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-04-08 21:48:24 +00:00
parent eb2f0ebd62
commit 6db9a00a0b
1 changed files with 28 additions and 4 deletions

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
import org.broadinstitute.sting.utils.QualityUtils;
import java.io.File;
import java.util.Random;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
@ -15,6 +16,7 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram {
public File ASAM;
public File OSAM;
private Random rg;
public static void main(String[] argv) {
Instance = new RepairBadlyCombinedSamFile();
@ -27,19 +29,21 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram {
}
protected int execute() {
rg = new Random();
SAMFileReader asamr = new SAMFileReader(ASAM);
asamr.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
SAMFileWriter osamw = new SAMFileWriterFactory().makeSAMOrBAMWriter(asamr.getFileHeader(), false, OSAM);
for (SAMRecord samRecord : asamr) {
byte[] bases = samRecord.getReadBases();
byte[] newbases = new byte[bases.length];
if (samRecord.getReadNegativeStrandFlag()) {
byte[] bases = samRecord.getReadBases();
byte[] quals = samRecord.getBaseQualities();
byte[] sq = (byte[]) samRecord.getAttribute("SQ");
byte[] newbases = new byte[bases.length];
byte[] newquals = new byte[bases.length];
byte[] newsq = new byte[bases.length];
@ -50,7 +54,7 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram {
case 'C': newbase = 'G'; break;
case 'G': newbase = 'C'; break;
case 'T': newbase = 'A'; break;
default: newbase = 'A'; // hack. This is so downstream applications don't barf on an ambiguous base 'matching' the reference
default: newbase = getRandomBase(); // hack. This is so downstream applications don't barf on an ambiguous base 'matching' the reference
}
newbases[i] = newbase;
newquals[i] = quals[bases.length - i - 1];
@ -60,11 +64,31 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram {
samRecord.setReadBases(newbases);
samRecord.setBaseQualities(newquals);
samRecord.setAttribute("SQ", newsq);
} else {
for (int i = 0; i < bases.length; i++) {
newbases[i] = (bases[i] == 'A' || bases[i] == 'C' || bases[i] == 'G' || bases[i] == 'T') ? bases[i] : getRandomBase();
}
samRecord.setReadBases(newbases);
}
osamw.addAlignment(samRecord);
}
osamw.close();
return 0;
}
private byte getRandomBase() {
int baseIndex = rg.nextInt(4);
switch (baseIndex) {
case 0: return 'A';
case 1: return 'C';
case 2: return 'G';
case 3: return 'T';
default: return '.'; // unreachable
}
}
}