diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java index 41a85d39f..144f2fb79 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java @@ -3,8 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.*; import net.sf.samtools.SAMRecord; import net.sf.samtools.AlignmentBlock; @@ -17,8 +16,8 @@ import java.util.List; @WalkerName("IndelIntervals") public class IndelIntervalWalker extends ReadWalker { - @Argument(fullName="maxReadLength", shortName="maxRead", doc="max read length", required=false) - public int maxReadLength = -1; + @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) + public boolean allow454 = false; @Argument(fullName="minIndelsPerInterval", shortName="minIndels", doc="min indels per interval", required=false) public int minIntervalIndelCount = 1; @@ -26,9 +25,9 @@ public class IndelIntervalWalker extends ReadWalker 1); // indel + read.getAlignmentBlocks().size() > 1 && // indel + (allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ); } public Interval map(char[] ref, SAMRecord read) { @@ -92,4 +91,4 @@ public class IndelIntervalWalker extends ReadWalker { - @Argument(fullName="maxReadLength", shortName="maxRead", doc="max read length", required=false) - public int maxReadLength = -1; + @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) + public boolean allow454 = false; @Argument(fullName="OutputCleaned", shortName="O", required=false, doc="Output file (sam or bam) for improved (realigned) reads") public String OUT = null; @Argument(fullName="OutputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found") public String OUT_INDELS = null; - @Argument(fullName="OutputAllReads", shortName="all", doc="print out all reads (otherwise, just those within the intervals)", required=false) - public boolean printAllReads = false; @Argument(fullName="OutputCleanedReadsOnly", shortName="cleanedOnly", doc="print out cleaned reads only (otherwise, all reads within the intervals)", required=false) public boolean cleanedReadsOnly = false; @Argument(fullName="statisticsFile", shortName="stats", doc="print out statistics (what does or doesn't get cleaned)", required=false) @@ -39,8 +37,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker public int MAX_CONSENSUSES = 30; @Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false) public int MAX_READS_FOR_CONSENSUSES = 120; - @Argument(fullName="noOutputSorting", shortName="nosort", doc="if specified, the output bam file may be not fully sorted. Use if low on temp space", required=false) - public boolean NOSORT=false; public static final int MAX_QUAL = 99; @@ -62,12 +58,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); if ( OUT != null ) { - if ( NOSORT ) { - header.setSortOrder(SortOrder.unsorted); - writer = Utils.createSAMFileWriterWithCompression(header, true, OUT, getToolkit().getBAMCompression()); - } else { - writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression()); - } + writer = Utils.createSAMFileWriterWithCompression(header, true, OUT, getToolkit().getBAMCompression()); readsToWrite = new TreeSet(); } @@ -104,34 +95,16 @@ public class IntervalCleanerWalker extends LocusWindowWalker } } - // do we care about reads that are not part of our intervals? - public boolean actOnNonIntervalReads() { - return printAllReads && !cleanedReadsOnly; - } - - // What do we do with the reads that are not part of our intervals? - public void nonIntervalReadAction(SAMRecord read) { - if ( writer != null ) { - try { - writer.addAlignment(read); - } catch (Exception e ) { - logger.error("Failed to write read "+read.getReadName()+" aligned at "+read.getReferenceName() +":"+read.getAlignmentStart()); - e.printStackTrace(out); - throw new StingException(e.getMessage()); - } - } - } - public Integer map(RefMetaDataTracker tracker, String ref, LocusContext context) { List reads = context.getReads(); ArrayList goodReads = new ArrayList(); for ( SAMRecord read : reads ) { - if ( (maxReadLength < 0 || read.getReadLength() <= maxReadLength) && - !read.getReadUnmappedFlag() && + if ( !read.getReadUnmappedFlag() && !read.getNotPrimaryAlignmentFlag() && !read.getDuplicateReadFlag() && read.getMappingQuality() != 0 && - read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) + read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START && + (allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) goodReads.add(read); else if ( writer != null && !cleanedReadsOnly ) readsToWrite.add(new ComparableSAMRecord(read)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java new file mode 100755 index 000000000..001d4a19e --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.playground.gatk.walkers.indels; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +import java.util.*; +/** + * User: ebanks + * Date: Jun 10, 2009 + * Time: 2:40:19 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Merges intervals based on reads which overlap them. + */ +@WalkerName("IntervalMerger") +@Requires({DataSource.READS}) +public class IntervalMergerWalker extends ReadWalker { + + @Argument(fullName="intervalsToMerge", shortName="intervals", doc="Intervals to merge", required=true) + String intervalsSource = null; + @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) + public boolean allow454 = false; + @Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="max interval size", required=false) + public int maxIntervalSize = 500; + + private LinkedList intervals; + private GenomeLoc firstInterval = null; + + @Override + public void initialize() { + intervals = parseIntervals(intervalsSource); + firstInterval = intervals.removeFirst(); + } + + @Override + public Integer map(char[] ref, SAMRecord read) { + if ( firstInterval == null || + (!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) + return 0; + + GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); + + // emit all intervals which we've passed + while ( loc.isPast(firstInterval) ) { + if ( firstInterval.getStop() - firstInterval.getStart() < maxIntervalSize) + out.println(firstInterval); + if ( intervals.size() > 0 ) { + firstInterval = intervals.removeFirst(); + } else { + firstInterval = null; + return 0; + } + } + + // if we're not yet in the first interval, we're done + if ( !loc.overlapsP(firstInterval)) + return 0; + + // at this point, we're in the first interval. + // now we can merge any other intervals which we overlap + while ( intervals.size() > 0 && loc.overlapsP(intervals.getFirst()) ) + firstInterval.setStop(intervals.removeFirst().getStop()); + + return 1; + } + + @Override + public Integer reduceInit() { return 0; } + + @Override + public Integer reduce( Integer value, Integer accum ) { + return accum + value; + } + + @Override + public void onTraversalDone( Integer value ) { + if ( firstInterval != null && + firstInterval.getStop() - firstInterval.getStart() < maxIntervalSize) + out.println(firstInterval); + } + + /** + * Load the intervals directly from the command-line or from file, as appropriate. + * Merge overlapping intervals. + * @param intervalsSource Source of intervals. + * @return a linked list of sorted, merged intervals. + */ + private LinkedList parseIntervals(String intervalsSource) { + List parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false); + GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet(); + for ( GenomeLoc parsedInterval : parsedIntervals ) + intervalSortedSet.addRegion(parsedInterval); + + return new LinkedList( intervalSortedSet ); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java index 1d5eb2639..73a78e7f8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java @@ -17,6 +17,8 @@ public class MismatchIntervalWalker extends LocusWalker public int windowSize = 10; @Argument(fullName="mismatchFraction", shortName="mismatch", doc="fraction of mismatching base qualities threshold", required=false) public double mismatchThreshold = 0.15; + @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) + public boolean allow454 = false; private final int minReadsAtInterval = 4; @@ -32,7 +34,9 @@ public class MismatchIntervalWalker extends LocusWalker int goodReads = 0, mismatchQualities = 0, totalQualities = 0; for (int i = 0; i < reads.size(); i++) { SAMRecord read = reads.get(i); - if ( read.getMappingQuality() == 0 || read.getAlignmentBlocks().size() > 1 ) + if ( read.getMappingQuality() == 0 || + read.getAlignmentBlocks().size() > 1 || + (!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) continue; goodReads++; @@ -105,4 +109,4 @@ public class MismatchIntervalWalker extends LocusWalker return sum; } -} \ No newline at end of file +}