Bug fix: when cleanedReadIterator is initialized, it gets immediately set to the contig of the first cleaned read; when the first uncleaned read coming in is on the lower contig, this would trigger 'readNextContig' with that lower contig as an argument. As the result, the whole cleaned reads file would be read through the end and no cleaned reads would be ever seen by the code afterwards. Now we do not call readNextContig if the (uncleaned) read's contig is lower than the current contig already loaded into cleanedReadIterator. the 'readNextContig' method now also throws an exception if requested contig is less than the currently loaded one

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2037 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-13 15:41:26 +00:00 · 2009-11-13 15:41:26 +00:00 · 1be36ca959
parent b1376e4216
commit 1be36ca959
1 changed files with 28 additions and 3 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java
@ -97,6 +97,12 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
     */
    @Override
    public Integer map(char[] ref, SAMRecord read) {
+//        boolean DEBUG = false;
+//        if ( read.getReadName().equals("42NY6AAXX091009:6:73:327:916#0") ) {
+//            System.out.println(read.toString());
+//            System.out.println(getUniquifiedReadName(read)) ;
+//            DEBUG=true;
+//        }

        // first emit reads from the cleaned set if appropriate
        int cleanedReadCount = 0;
@ -110,11 +116,20 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
            firstCleanedRead = cleanedReads.peek();
        }

-        // update the hashes if necessary
-        cleanedReadsIterator.readNextContig(read.getReferenceIndex());
+        // update the hashes if necessary; NOTE: cleanedReadsIterator was intitialized with the
+        // contig where the first available cleaned read sits (so it can be any contig, e.g. 22);
+        // the 'read' below comes from a different file (original, uncleaned reads, so it may also
+        // be anywhere - including lower contig, say 1. cleanedReadIterator can not scroll back,
+        // so the statement below will take care of skipping reads on lower contigs until we reach
+        // the contig of the first cleaned read.
+        if ( read.getReferenceIndex() > cleanedReadsIterator.getCurrentContig() ) {
+            cleanedReadsIterator.readNextContig(read.getReferenceIndex());
+        }

-        if ( !cleanedReadHash.contains(getUniquifiedReadName(read)) )
+        if ( !cleanedReadHash.contains(getUniquifiedReadName(read)) )  {
+//            if ( DEBUG ) System.out.println("Not found in hash. Hash size:"+cleanedReadHash.size());
            outputBAM.addAlignment(read);
+        }
        return cleanedReadCount;
    }

@ -194,11 +209,17 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
                readNextContig(nextRead.getReferenceIndex());
        }

+        int getCurrentContig() { return contig; }
+
        public void readNextContig(int newContig) {
+            if ( newContig < contig )
+                throw new StingException("Requested shift to contig "+newContig+" which is before the current contig "+contig);
+
            // don't do anything if we're in the right contig or have no reads
            if ( newContig == contig || nextRead == null )
                return;

+            System.out.println("Loading contig "+newContig+"; old contig was "+contig);
            contig = newContig;
            cleanedReadHash.clear();
            cleanedReads.clear();
@ -214,6 +235,10 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
                    nextRead.getReferenceIndex() == contig ) {
                cleanedReads.add(nextRead);
                cleanedReadHash.add(getUniquifiedReadName(nextRead));
+//                if ( nextRead.getReadName().equals("42NY6AAXX091009:6:73:327:916#0") ) {
+//                    System.out.println("In hash: "+getUniquifiedReadName(nextRead));
+//                    System.out.println("In hash: "+nextRead.toString());
+//                }
                nextRead = (iterator.hasNext() ? iterator.next() : null);
            }
        }