From 43812a28fcd02617dd23d76df5f51fa46cfc811f Mon Sep 17 00:00:00 2001 From: asivache Date: Wed, 26 Jan 2011 20:07:08 +0000 Subject: [PATCH] If among all the multiple alignments for the given read we have 'unmapped' ones (can happen with bwa 0.5.7 and maybe later versions), then discard the latters and keep only the mapped ones. Keep 'unmapped' only if its the only alignment available. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5090 348d0f76-0448-11de-a6fe-93d51630548a --- .../playground/tools/RemapAlignments.java | 56 ++++++++++++++----- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java index 631c69b8c..b7acfba43 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java +++ b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java @@ -29,6 +29,7 @@ import java.io.File; import java.util.Comparator; import java.util.Set; import java.util.TreeSet; +import java.util.Iterator; import java.util.Map.Entry; import org.broadinstitute.sting.playground.utils.GenomicMap; @@ -154,9 +155,9 @@ public class RemapAlignments extends CommandLineProgram { } if ( AlignmentUtils.isReadUnmapped(read) ) totalUnmappedReads++; - // destroy mate pair mapping information, if any (we will need to reconstitute pairs after remapping both ends): - read.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); - read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + // destroy mate pair mapping information, if any (we will need to reconstitute pairs after remapping both ends): + read.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); + read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); // if ( read.getReadPairedFlag() ) System.out.println("PAIRED READ!!"); totalRecords++; @@ -243,29 +244,54 @@ public class RemapAlignments extends CommandLineProgram { // need to figure out how many best vs inferior alignments are there: int minNM = 1000000; int cnt = 0; // count of best alignments - for ( SAMRecord r : reads ) { + Iterator it = reads.iterator(); + int n = reads.size(); // total number of (alternative) alignments for the given read. + boolean canComputeMapQ = true; + while ( it.hasNext() ) { + SAMRecord r = it.next(); + if ( AlignmentUtils.isReadUnmapped(r) && n > 1) { + // we do not want to keep unmapped records in the set unless it's the last and only record! + it.remove(); + n--; // one less alignment left in the current group of alignments + continue; + } + if ( ! canComputeMapQ ) continue; // some reads were missing NM attribute, so do not bother - we can not compute MapQ Object attr = r.getAttribute("NM"); if ( attr == null ) { - return; // can not recompute qualities! + canComputeMapQ = false; // can not recompute qualities! + continue; + } else { + int nm; + if ( attr instanceof Short ) nm = ((Short)attr).intValue(); + else if ( attr instanceof Integer ) nm = ((Integer)attr).intValue(); + else throw new RuntimeException("NM attribute is neither Short nor Integer, don't know what to do."); + if ( nm < minNM ) { + minNM = nm; + cnt = 1; + } else if ( nm == minNM ) cnt++; } - int nm; - if ( attr instanceof Short ) nm = ((Short)attr).intValue(); - else if ( attr instanceof Integer ) nm = ((Integer)attr).intValue(); - else throw new RuntimeException("NM attribute is neither Short nor Integer, don't know what to do."); - if ( nm < minNM ) { - minNM = nm; - cnt = 1; - } else if ( nm == minNM ) cnt++; } - // now reset counts and quals: + if ( n == 1 ) { + SAMRecord r = reads.iterator().next() ; + if (AlignmentUtils.isReadUnmapped(r) ) { + // special case: we are left with a single unmapped alignment + r.setAttribute("X0", new Integer(0)); + r.setAttribute("X1", new Integer(0)); + return; + } + } + + // now reset counts of available alignments and mapping quals (if we can) in every alignment record: for ( SAMRecord r : reads ) { int cnt2 = reads.size() - cnt; // count of inferior alignments r.setAttribute("X0", new Integer(cnt)); r.setAttribute("X1", new Integer(cnt2)); - + + if ( ! canComputeMapQ ) continue; // not all reads had NM field, so we can not recompute MapQ + if ( cnt2 > 255 ) cnt2 = 255; // otherwise we will be out of bounds in g_log_n int nm_attr;