From f9cdc119afec2d0d4dd76e04f82e138c9cf96dca Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 21 Sep 2011 18:16:42 -0400 Subject: [PATCH] Added a method to ReadUtils that converts reads of the form 10S20M10S to 40M (just unclips the soft-clips). Be careful when using this - if you're writing a bam file it will be potentially written out of order (since the previous alignment start was at the M, not the S). --- .../sting/utils/sam/ReadUtils.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java old mode 100644 new mode 100755 index 5d3ef3086..60c9c1780 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -681,6 +681,9 @@ public class ReadUtils { @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) { + return read.getUnclippedEnd(); + } int stop = read.getUnclippedStart(); if (readIsEntirelyInsertion(read)) @@ -787,5 +790,47 @@ public class ReadUtils { return readBases; } + public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { + int newReadStart = rec.getAlignmentStart(); + int newReadEnd = rec.getAlignmentEnd(); + List newCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + int heldOver = -1; + boolean sSeen = false; + for ( CigarElement e : rec.getCigar().getCigarElements() ) { + if ( e.getOperator().equals(CigarOperator.S) ) { + newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M)); + if ( sSeen ) { + newReadEnd += e.getLength(); + sSeen = true; + } else { + newReadStart -= e.getLength(); + } + } else { + newCigarElements.add(e); + } + } + // merge duplicate operators together + int idx = 0; + List finalCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + while ( idx < newCigarElements.size() -1 ) { + if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) { + int combSize = newCigarElements.get(idx).getLength(); + int offset = 0; + while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) { + combSize += newCigarElements.get(idx+offset+1).getLength(); + offset++; + } + finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator())); + idx = idx + offset -1; + } else { + finalCigarElements.add(newCigarElements.get(idx)); + } + idx++; + } + rec.setCigar(new Cigar(finalCigarElements)); + rec.setAlignmentStart(newReadStart); + + return rec; + } }