From 400399f1b82607965b2185647483538f400f137a Mon Sep 17 00:00:00 2001 From: asivache Date: Fri, 5 Jun 2009 22:04:37 +0000 Subject: [PATCH] fixed (?) a bug in insertion realignment git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@917 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/IntervalCleanerWalker.java | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index b51bd0ee7..1f40845c5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -13,7 +13,10 @@ import net.sf.samtools.*; import java.util.*; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FileWriter; +import java.io.OutputStream; @WalkerName("IntervalCleaner") public class IntervalCleanerWalker extends LocusWindowWalker { @@ -37,6 +40,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker private FileWriter indelOutput = null; private FileWriter statsOutput = null; + // we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes private TreeSet readsToWrite; @@ -46,6 +50,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression()); + + logger.info("Writing into output BAM file at compression level " + getToolkit().getBAMCompression()); + logger.info("Temporary space used: "+System.getProperty("java.io.tmpdir")); + if ( OUT_INDELS != null ) { try { indelOutput = new FileWriter(new File(OUT_INDELS)); @@ -72,8 +80,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker // What do we do with the reads that are not part of our intervals? public void nonIntervalReadAction(SAMRecord read) { - writer.addAlignment(read); - } + try { + writer.addAlignment(read); + } catch (Exception e ) { + logger.error("Failed to write read "+read.getReadName()+" "+read.getReferenceName() +":"+read.getAlignmentStart()); + e.printStackTrace(out); + throw new StingException(e.getMessage()); + } + } public Integer map(RefMetaDataTracker tracker, String ref, LocusContext context) { List reads = context.getReads(); @@ -305,7 +319,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker if ( statsOutput != null ) { try { statsOutput.write(interval.toString()); - statsOutput.write("\tFAIL (bad indel)\t"); + statsOutput.write("\tFAIL (bad indel)\t"); // if improvement > LOD_THRESHOLD *BUT* entropy is not reduced (???) statsOutput.write(Double.toString(improvement)); statsOutput.write("\n"); statsOutput.flush(); @@ -335,7 +349,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker if ( statsOutput != null ) { try { statsOutput.write(interval.toString()); - statsOutput.write("\tCLEAN"); + statsOutput.write("\tCLEAN"); // if improvement > LOD_THRESHOLD *AND* entropy is reduced if ( bestConsensus.cigar.numCigarElements() > 1 ) statsOutput.write(" (found indel)"); statsOutput.write("\t"); @@ -362,7 +376,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker } else if ( statsOutput != null ) { try { statsOutput.write(interval.toString()); - statsOutput.write("\tFAIL\t"); + statsOutput.write("\tFAIL\t"); // if improvement < LOD_THRESHOLD statsOutput.write(Double.toString(improvement)); statsOutput.write("\n"); statsOutput.flush(); @@ -551,8 +565,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker } difference = indelIndex - newIndex; } else if ( ce2.getOperator() == CigarOperator.I ) { - int indelIndex = refIndex + ce1.getLength(); - String indelString = readSeq.substring(indelIndex, indelIndex+ce2.getLength()); + int indelIndex = ce1.getLength(); // this is the position of inserted bases ON THE READ + String indelString = readSeq.substring(indelIndex, indelIndex+ce2.getLength()); // get the inserted bases int newIndex = indelIndex; while ( newIndex > 0 ) { String newIndelString = readSeq.substring(newIndex-1, newIndex+ce2.getLength()-1);