From 095dacd154241ecbc3a552b001934fc4a9eecad6 Mon Sep 17 00:00:00 2001 From: kiran Date: Tue, 12 May 2009 19:46:50 +0000 Subject: [PATCH] Experimental refactoring. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@675 348d0f76-0448-11de-a6fe-93d51630548a --- .../fourbasecaller/AddFourProbsToSAM.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/fourbasecaller/AddFourProbsToSAM.java diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/AddFourProbsToSAM.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/AddFourProbsToSAM.java new file mode 100755 index 000000000..c9b0e47db --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/AddFourProbsToSAM.java @@ -0,0 +1,96 @@ +package org.broadinstitute.sting.playground.fourbasecaller; + +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.QualityUtils; + +import java.io.File; +import java.util.HashMap; + +import net.sf.samtools.*; + +public class AddFourProbsToSAM extends CommandLineProgram { + public static AddFourProbsToSAM Instance = null; + + public File UNALIGNED_SAM; + public File ALIGNED_SAM; + public File FINAL_SAM; + public int END; + public Boolean DEBUG = false; + + public static void main(String[] argv) { + Instance = new AddFourProbsToSAM(); + start(Instance, argv); + } + + protected void setupArgs() { + //m_parser.addRequiredArg("unaligned_sam", "U", "Unaligned SAM file", "UNALIGNED_SAM"); + //m_parser.addRequiredArg("aligned_sam", "A", "Aligned SAM file", "ALIGNED_SAM"); + //m_parser.addRequiredArg("final_sam", "F", "Final SAM file", "FINAL_SAM"); + //m_parser.addRequiredArg("end", "E", "Pair end (0 - all, 1 - first, 2 - second)", "END"); + //m_parser.addOptionalFlag("debug", "D", "Turn on debugging output", "DEBUG"); + } + + protected int execute() { + int processed; + + SAMFileReader alignedSf = new SAMFileReader(ALIGNED_SAM); + alignedSf.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + // First, hash the aligned records (because there are less of them than unaligned reads) + System.err.println("Hashing aligned records..."); + + HashMap records = new HashMap(10000000); + processed = 0; + for (SAMRecord alignedSr : alignedSf) { + if (END == 0 || (END == 1 && alignedSr.getSecondOfPairFlag() == false) || (END == 2 && alignedSr.getSecondOfPairFlag() == true)) { + if (!alignedSr.getReadUnmappedFlag()) { + records.put(alignedSr.getReadName(), alignedSr); + + if (processed % 100000 == 0) { System.err.print("\tProcessed " + processed + " records.\r"); } + processed++; + } + } + } + + // Now, iterate over the unaligned SAM file and stick the four-base probs in. + System.err.println("\nInterating over unaligned records..."); + + SAMFileReader unalignedSf = new SAMFileReader(UNALIGNED_SAM); + unalignedSf.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + SAMFileHeader swhead = alignedSf.getFileHeader(); + swhead.setSortOrder(SAMFileHeader.SortOrder.unsorted); + SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(swhead, true, FINAL_SAM); + + processed = 0; + for (SAMRecord unalignedSr : unalignedSf) { + if (records.containsKey(unalignedSr.getReadName())) { + SAMRecord alignedSr = records.get(unalignedSr.getReadName()); + + byte[] sq = (byte[]) unalignedSr.getAttribute("SQ"); + if (alignedSr.getReadNegativeStrandFlag()) { + sq = QualityUtils.reverseComplementCompressedQualityArray(sq); + } + + alignedSr.setAttribute("SQ", sq); + alignedSr.setAttribute("KB", unalignedSr.getReadBases()); + alignedSr.setAttribute("KQ", unalignedSr.getBaseQualities()); + sw.addAlignment(alignedSr); + + if (DEBUG) { + System.out.println(alignedSr.format()); + } + + if (processed % 100000 == 0) { System.err.print("\tProcessed " + processed + " records.\r"); } + processed++; + } + } + + sw.close(); + alignedSf.close(); + unalignedSf.close(); + + return 0; + } +}