From fa6468d1673a80bb9185890b226640f975865ed2 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 13 Apr 2011 18:47:47 +0000 Subject: [PATCH] Remove the adaptor sequence clipping read filter because it is dangerous (it breaks LocusIteratorByState). We'll bring it back to life when ReadTransformers are created. Instead, have the utility code return a new clipped SAMRecord (necessary so that we don't break SNP calling in UG when the indel caller tries to hard-clip the reads). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5629 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/filters/AdaptorSequenceFilter.java | 44 --------------- .../sting/utils/sam/ReadUtils.java | 55 ++++++++++++++----- 2 files changed, 40 insertions(+), 59 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/filters/AdaptorSequenceFilter.java diff --git a/java/src/org/broadinstitute/sting/gatk/filters/AdaptorSequenceFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/AdaptorSequenceFilter.java deleted file mode 100755 index bf04633ec..000000000 --- a/java/src/org/broadinstitute/sting/gatk/filters/AdaptorSequenceFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.filters; - -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -/** - * This class doesn't actually filter out reads (it's really a ReadTransformer): it hard-clips the - * reads to remove the adaptor sequences from them. - * - * @author ebanks, depristo - * @version 0.1 - */ - -public class AdaptorSequenceFilter implements SamRecordFilter { - - public boolean filterOut(final SAMRecord rec) { - return ReadUtils.hardClipAdaptorSequence(rec); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 8fa6fcd0f..0fa227fee 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -176,27 +176,41 @@ public class ReadUtils { return new Pair(adaptorStart, adaptorEnd); } - // return true if the read needs to be completely clipped - public static boolean hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { + /** + * + * @param rec original SAM record + * @param adaptorLength length of adaptor sequence + * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped + */ + public static SAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { Pair adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); + SAMRecord result = rec; + if ( adaptorBoundaries != null ) { if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() ) - return hardClipStartOfRead(rec, adaptorBoundaries.second); + result = hardClipStartOfRead(rec, adaptorBoundaries.second); else if ( !rec.getReadNegativeStrandFlag() && adaptorBoundaries.first <= rec.getAlignmentEnd() ) - return hardClipEndOfRead(rec, adaptorBoundaries.first); + result = hardClipEndOfRead(rec, adaptorBoundaries.first); } - return false; + return result; } // return true if the read needs to be completely clipped - private static boolean hardClipStartOfRead(SAMRecord rec, int stopPosition) { + private static SAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { - if ( stopPosition >= rec.getAlignmentEnd() ) { + if ( stopPosition >= oldRec.getAlignmentEnd() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it //System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName()); - return true; + return null; + } + + SAMRecord rec; + try { + rec = (SAMRecord)oldRec.clone(); + } catch (Exception e) { + return null; } //System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n", @@ -261,16 +275,22 @@ public class ReadUtils { // adjust the start accordingly rec.setAlignmentStart(stopPosition + 1); - return false; + return rec; } - // return true if the read needs to be completely clipped - private static boolean hardClipEndOfRead(SAMRecord rec, int startPosition) { + private static SAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { - if ( startPosition <= rec.getAlignmentStart() ) { + if ( startPosition <= oldRec.getAlignmentStart() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it //System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName()); - return true; + return null; + } + + SAMRecord rec; + try { + rec = (SAMRecord)oldRec.clone(); + } catch (Exception e) { + return null; } //System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n", @@ -341,12 +361,17 @@ public class ReadUtils { // adjust the stop accordingly // rec.setAlignmentEnd(startPosition - 1); - return false; + return rec; } private static int DEFAULT_ADAPTOR_SIZE = 100; - public static boolean hardClipAdaptorSequence(final SAMRecord rec) { + /** + * + * @param rec original SAM record + * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped + */ + public static SAMRecord hardClipAdaptorSequence(final SAMRecord rec) { return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE); }