From 47c4a70ac1eb2decf991b53f76afa510607a57a2 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 30 Jun 2010 15:50:44 +0000 Subject: [PATCH] It turns out that it is legitimately possible for there to be reads that won't overlap within a target interval for cleaning. While we don't want to attempt cleaning, we also don't want to fail. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3682 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IndelRealigner.java | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 5b10eee1d..380365363 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -264,18 +264,16 @@ public class IndelRealigner extends ReadWalker { if ( doNotTryToClean(read) ) { readsNotToClean.add(read); } else { - readsToClean.add(read, ref.getBases()); + boolean wellCoveredInterval = readsToClean.add(read, ref.getBases()); + if ( !wellCoveredInterval ) + abortCleanForCurrentInterval(); + // add the rods to the list of known variants populateKnownIndels(metaDataTracker, ref); } if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) { - // merge the two sets for emission - readsNotToClean.addAll(readsToClean.getReads()); - emit(readsNotToClean); - readsToClean.clear(); - readsNotToClean.clear(); - currentInterval = intervals.hasNext() ? intervals.next() : null; + abortCleanForCurrentInterval(); } } else { // the read is past the current interval @@ -285,6 +283,15 @@ public class IndelRealigner extends ReadWalker { return 0; } + private void abortCleanForCurrentInterval() { + // merge the two sets for emission + readsNotToClean.addAll(readsToClean.getReads()); + emit(readsNotToClean); + readsToClean.clear(); + readsNotToClean.clear(); + currentInterval = intervals.hasNext() ? intervals.next() : null; + } + private boolean doNotTryToClean(SAMRecord read) { return read.getReadUnmappedFlag() || read.getNotPrimaryAlignmentFlag() || @@ -1232,7 +1239,9 @@ public class IndelRealigner extends ReadWalker { public ReadBin() { } - public void add(SAMRecord read, byte[] ref) { + // Return false if we can't process this read bin because the reads are not correctly overlapping. + // This can happen if e.g. there's a large known indel with no overlapping reads. + public boolean add(SAMRecord read, byte[] ref) { reads.add(read); // set up the reference @@ -1243,7 +1252,7 @@ public class IndelRealigner extends ReadWalker { long lastPosWithRefBase = loc.getStart() + reference.length -1; int neededBases = (int)(read.getAlignmentEnd() - lastPosWithRefBase); if ( neededBases > ref.length ) - throw new StingException("Read " + read.getReadName() + " does not overlap the previous read in this interval; please ensure that you are using the same input bam that was used in the RealignerTargetCreator step"); + return false; if ( neededBases > 0 ) { byte[] newReference = new byte[reference.length + neededBases]; System.arraycopy(reference, 0, newReference, 0, reference.length); @@ -1252,6 +1261,8 @@ public class IndelRealigner extends ReadWalker { loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()+neededBases); } } + + return true; } public List getReads() { return reads; }