From 4ff8b8fc0e4885940341e1e6712a529d704949f6 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 23 Jul 2010 14:24:01 +0000 Subject: [PATCH] 1. Fixing a bug that Mark found where indel-containing clipped reads would get an original cigar tag even when they didn't actually get modified. 2. Added some useful logging messages. 3. Added a oneoffs walker to calculate the number of realigned reads and intervals containing them. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3860 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IndelRealigner.java | 11 +- .../walkers/RealignedReadCounter.java | 129 ++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index c516116b3..424d7c625 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -275,14 +275,17 @@ public class IndelRealigner extends ReadWalker { readsNotToClean.add(read); } else { boolean wellCoveredInterval = readsToClean.add(read, ref.getBases()); - if ( !wellCoveredInterval ) - abortCleanForCurrentInterval(); + if ( !wellCoveredInterval ) { + logger.info("We are aborting the realignment in interval " + currentInterval + " because it is not fully covered by reads"); + abortCleanForCurrentInterval(); + } // add the rods to the list of known variants populateKnownIndels(metaDataTracker, ref); } if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) { + logger.info("We are aborting the realignment in interval " + currentInterval + " because there are too many reads (see --maxReadsForRealignment for details)"); abortCleanForCurrentInterval(); } } @@ -1126,8 +1129,10 @@ public class IndelRealigner extends ReadWalker { cigar = reclipCigar(cigar); // no change? - if ( getCigar().equals(cigar) ) + if ( read.getCigar().equals(cigar) ) { + newCigar = null; return; + } // no indel? String str = cigar.toString(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java new file mode 100755 index 000000000..0a46fc73f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.commandline.Argument; + +import java.io.File; +import java.util.*; + +@By(DataSource.READS) +public class RealignedReadCounter extends ReadWalker { + + public static final String ORIGINAL_CIGAR_TAG = "OC"; + public static final String ORIGINAL_POSITION_TAG = "OP"; + + @Argument(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) + protected String intervalsFile = null; + + // the intervals input by the user + private Iterator intervals = null; + + // the current interval in the list + private GenomeLoc currentInterval = null; + + private long updatedIntervals = 0, updatedReads = 0; + private boolean intervalWasUpdated = false; + + public void initialize() { + // prepare to read intervals one-by-one, as needed (assuming they are sorted). + intervals = new IntervalFileMergingIterator( new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); + currentInterval = intervals.hasNext() ? intervals.next() : null; + } + + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + if ( currentInterval == null ) { + return 0; + } + + GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + // hack to get around unmapped reads having screwy locations + if ( readLoc.getStop() == 0 ) + readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart()); + + if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) + return 0; + + if ( readLoc.overlapsP(currentInterval) ) { + if ( doNotTryToClean(read) ) + return 0; + + if ( read.getAttribute(ORIGINAL_CIGAR_TAG) != null ) { + String newCigar = (String)read.getAttribute(ORIGINAL_CIGAR_TAG); + // deal with an old bug + if ( read.getCigar().toString().equals(newCigar) ) { + //System.out.println(currentInterval + ": " + read.getReadName() + " " + read.getCigarString() + " " + newCigar); + return 0; + } + + if ( !intervalWasUpdated ) { + intervalWasUpdated = true; + updatedIntervals++; + } + updatedReads++; + + } + } else { + do { + intervalWasUpdated = false; + currentInterval = intervals.hasNext() ? intervals.next() : null; + } while ( currentInterval != null && currentInterval.isBefore(readLoc) ); + } + + return 0; + } + + private boolean doNotTryToClean(SAMRecord read) { + return read.getReadUnmappedFlag() || + read.getNotPrimaryAlignmentFlag() || + read.getReadFailsVendorQualityCheckFlag() || + read.getMappingQuality() == 0 || + read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START || + (BadMateFilter.hasBadMate(read)); + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) { + System.out.println(updatedIntervals + " intervals were updated"); + System.out.println(updatedReads + " reads were updated"); + } +} \ No newline at end of file