From 6db9a00a0baa286b625bd40240c46da583b7af8f Mon Sep 17 00:00:00 2001 From: kiran Date: Wed, 8 Apr 2009 21:48:24 +0000 Subject: [PATCH] SAMFileWriter doesn't appear to flush the buffer when its destructor is called. You have to call the close() method. Also, choose a random base for Ns in the forward and reverse strands so that samtools doesn't pitch a fit. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@338 348d0f76-0448-11de-a6fe-93d51630548a --- .../RepairBadlyCombinedSamFile.java | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/RepairBadlyCombinedSamFile.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/RepairBadlyCombinedSamFile.java index 2b09b718a..f70c8cc46 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/RepairBadlyCombinedSamFile.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/RepairBadlyCombinedSamFile.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import org.broadinstitute.sting.utils.QualityUtils; import java.io.File; +import java.util.Random; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; @@ -15,6 +16,7 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram { public File ASAM; public File OSAM; + private Random rg; public static void main(String[] argv) { Instance = new RepairBadlyCombinedSamFile(); @@ -27,19 +29,21 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram { } protected int execute() { + rg = new Random(); + SAMFileReader asamr = new SAMFileReader(ASAM); asamr.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); SAMFileWriter osamw = new SAMFileWriterFactory().makeSAMOrBAMWriter(asamr.getFileHeader(), false, OSAM); - for (SAMRecord samRecord : asamr) { + byte[] bases = samRecord.getReadBases(); + byte[] newbases = new byte[bases.length]; + if (samRecord.getReadNegativeStrandFlag()) { - byte[] bases = samRecord.getReadBases(); byte[] quals = samRecord.getBaseQualities(); byte[] sq = (byte[]) samRecord.getAttribute("SQ"); - byte[] newbases = new byte[bases.length]; byte[] newquals = new byte[bases.length]; byte[] newsq = new byte[bases.length]; @@ -50,7 +54,7 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram { case 'C': newbase = 'G'; break; case 'G': newbase = 'C'; break; case 'T': newbase = 'A'; break; - default: newbase = 'A'; // hack. This is so downstream applications don't barf on an ambiguous base 'matching' the reference + default: newbase = getRandomBase(); // hack. This is so downstream applications don't barf on an ambiguous base 'matching' the reference } newbases[i] = newbase; newquals[i] = quals[bases.length - i - 1]; @@ -60,11 +64,31 @@ public class RepairBadlyCombinedSamFile extends CommandLineProgram { samRecord.setReadBases(newbases); samRecord.setBaseQualities(newquals); samRecord.setAttribute("SQ", newsq); + } else { + for (int i = 0; i < bases.length; i++) { + newbases[i] = (bases[i] == 'A' || bases[i] == 'C' || bases[i] == 'G' || bases[i] == 'T') ? bases[i] : getRandomBase(); + } + + samRecord.setReadBases(newbases); } osamw.addAlignment(samRecord); } + osamw.close(); + return 0; } + + private byte getRandomBase() { + int baseIndex = rg.nextInt(4); + + switch (baseIndex) { + case 0: return 'A'; + case 1: return 'C'; + case 2: return 'G'; + case 3: return 'T'; + default: return '.'; // unreachable + } + } }