From 1be36ca959a3eab22d9afe01b93ebf0d04dd2321 Mon Sep 17 00:00:00 2001 From: asivache Date: Fri, 13 Nov 2009 15:41:26 +0000 Subject: [PATCH] Bug fix: when cleanedReadIterator is initialized, it gets immediately set to the contig of the first cleaned read; when the first uncleaned read coming in is on the lower contig, this would trigger 'readNextContig' with that lower contig as an argument. As the result, the whole cleaned reads file would be read through the end and no cleaned reads would be ever seen by the code afterwards. Now we do not call readNextContig if the (uncleaned) read's contig is lower than the current contig already loaded into cleanedReadIterator. the 'readNextContig' method now also throws an exception if requested contig is less than the currently loaded one git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2037 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/CleanedReadInjector.java | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java index 27e3ff49d..e599952f9 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java @@ -97,6 +97,12 @@ public class CleanedReadInjector extends ReadWalker { */ @Override public Integer map(char[] ref, SAMRecord read) { +// boolean DEBUG = false; +// if ( read.getReadName().equals("42NY6AAXX091009:6:73:327:916#0") ) { +// System.out.println(read.toString()); +// System.out.println(getUniquifiedReadName(read)) ; +// DEBUG=true; +// } // first emit reads from the cleaned set if appropriate int cleanedReadCount = 0; @@ -110,11 +116,20 @@ public class CleanedReadInjector extends ReadWalker { firstCleanedRead = cleanedReads.peek(); } - // update the hashes if necessary - cleanedReadsIterator.readNextContig(read.getReferenceIndex()); + // update the hashes if necessary; NOTE: cleanedReadsIterator was intitialized with the + // contig where the first available cleaned read sits (so it can be any contig, e.g. 22); + // the 'read' below comes from a different file (original, uncleaned reads, so it may also + // be anywhere - including lower contig, say 1. cleanedReadIterator can not scroll back, + // so the statement below will take care of skipping reads on lower contigs until we reach + // the contig of the first cleaned read. + if ( read.getReferenceIndex() > cleanedReadsIterator.getCurrentContig() ) { + cleanedReadsIterator.readNextContig(read.getReferenceIndex()); + } - if ( !cleanedReadHash.contains(getUniquifiedReadName(read)) ) + if ( !cleanedReadHash.contains(getUniquifiedReadName(read)) ) { +// if ( DEBUG ) System.out.println("Not found in hash. Hash size:"+cleanedReadHash.size()); outputBAM.addAlignment(read); + } return cleanedReadCount; } @@ -194,11 +209,17 @@ public class CleanedReadInjector extends ReadWalker { readNextContig(nextRead.getReferenceIndex()); } + int getCurrentContig() { return contig; } + public void readNextContig(int newContig) { + if ( newContig < contig ) + throw new StingException("Requested shift to contig "+newContig+" which is before the current contig "+contig); + // don't do anything if we're in the right contig or have no reads if ( newContig == contig || nextRead == null ) return; + System.out.println("Loading contig "+newContig+"; old contig was "+contig); contig = newContig; cleanedReadHash.clear(); cleanedReads.clear(); @@ -214,6 +235,10 @@ public class CleanedReadInjector extends ReadWalker { nextRead.getReferenceIndex() == contig ) { cleanedReads.add(nextRead); cleanedReadHash.add(getUniquifiedReadName(nextRead)); +// if ( nextRead.getReadName().equals("42NY6AAXX091009:6:73:327:916#0") ) { +// System.out.println("In hash: "+getUniquifiedReadName(nextRead)); +// System.out.println("In hash: "+nextRead.toString()); +// } nextRead = (iterator.hasNext() ? iterator.next() : null); } }