From 05c667932176649b91c8846a544c96ce0b5fecb6 Mon Sep 17 00:00:00 2001 From: depristo Date: Fri, 24 Apr 2009 21:39:44 +0000 Subject: [PATCH] Enabled ReduceByInterval git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@533 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisTK.java | 3 + .../gatk/traversals/TraversalEngine.java | 28 ++++-- .../gatk/traversals/TraverseByIntervals.java | 2 +- .../sting/gatk/traversals/TraverseByLoci.java | 2 +- .../gatk/traversals/TraverseByReads.java | 2 +- .../gatk/traversals/TraverseByReference.java | 2 +- .../gatk/walkers/DepthOfCoverageWalker.java | 7 +- .../sting/gatk/walkers/Walker.java | 32 +++++++ .../sting/gatk/walkers/WriteBAM.java | 90 +++++++++++++++++++ 9 files changed, 158 insertions(+), 10 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/WriteBAM.java diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java index 101440671..857efbd4e 100644 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java @@ -351,6 +351,9 @@ public class GenomeAnalysisTK extends CommandLineProgram { * Initialize the output streams as specified by the user. */ private void initializeOutputStreams() { + out = System.out; + err = System.err; + if( outErrFileName != null && (outFileName != null || errFileName != null) ) throw new IllegalArgumentException("Can't set output/error output file with either out file name or err file name"); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 9b6d1f078..e164ed8da 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -418,7 +418,7 @@ public abstract class TraversalEngine { samReader.setValidationStringency(strictness); final SAMFileHeader header = samReader.getFileHeader(); - logger.info(String.format("Sort order is: " + header.getSortOrder())); + logger.debug(String.format("Sort order is: " + header.getSortOrder())); return samReader; } @@ -506,11 +506,29 @@ public abstract class TraversalEngine { // -------------------------------------------------------------------------------------------------------------- public T traverse(Walker walker) { - ArrayList l = new ArrayList(); - if ( hasLocations() ) - l = locs; + T sum = null; + if ( hasLocations() && walker.ReduceByInterval() ) { + logger.info("Doing reduce by interval processing"); + if ( ! hasLocations() ) + Utils.scareUser("ReduceByInterval requested by no interval was provided"); + List> map = new ArrayList>(locs.size()); + for ( GenomeLoc loc : locs ) { + ArrayList l = new ArrayList(); + l.add(loc); + T intervalSum = traverse(walker, l); + sum = intervalSum; + map.add(new Pair(loc, intervalSum)); + } + walker.onTraversalDone(map); + } else { + ArrayList l = new ArrayList(); + if ( hasLocations() ) + l = locs; + sum = traverse(walker, l); + } - return traverse(walker, l); + printOnTraversalDone("elements", sum); + return sum; } public T traverse(Walker walker, ArrayList locations) { diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByIntervals.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByIntervals.java index 8d2c15ed0..9cdf2c954 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByIntervals.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByIntervals.java @@ -85,7 +85,7 @@ public class TraverseByIntervals extends TraversalEngine { } } - printOnTraversalDone("intervals", sum); + //printOnTraversalDone("intervals", sum); walker.onTraversalDone(sum); return sum; } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java index 0fef71eaf..7cc847cd1 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java @@ -93,7 +93,7 @@ public class TraverseByLoci extends TraversalEngine { sum = carryWalkerOverInterval(walker, samReadIter, sum, null); } - printOnTraversalDone("loci", sum); + //printOnTraversalDone("loci", sum); walker.onTraversalDone(sum); return sum; } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java index 24a86ee3a..839b06736 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java @@ -115,7 +115,7 @@ public class TraverseByReads extends TraversalEngine { //System.out.printf("Done? %b%n", done); } - printOnTraversalDone("reads", sum); + //printOnTraversalDone("reads", sum); walker.onTraversalDone(sum); return sum; } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java index e49490b30..8964b4fea 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java @@ -64,7 +64,7 @@ public class TraverseByReference extends TraverseByLoci { sum = carryWalkerOverReference(walker, sum, null); } - printOnTraversalDone("reference", sum); + //printOnTraversalDone("reference", sum); walker.onTraversalDone(sum); return sum; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java index 2be4e6dc7..d5f33a836 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java @@ -24,6 +24,10 @@ public class DepthOfCoverageWalker extends LocusWalker return context.getReads().size(); } + public boolean ReduceByInterval() { + return true; + } + public Pair reduceInit() { return new Pair(0l,0l); } public Pair reduce(Integer value, Pair sum) { @@ -33,6 +37,7 @@ public class DepthOfCoverageWalker extends LocusWalker } public void onTraversalDone(Pair result) { - out.printf("Average depth of coverage is: %.2f\n", ((double)result.getFirst() / (double)result.getSecond())); + out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n", + ((double)result.getFirst() / (double)result.getSecond()), result.getFirst(), result.getSecond()); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 329645481..c61719869 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -1,8 +1,13 @@ package org.broadinstitute.sting.gatk.walkers; import java.io.PrintStream; +import java.util.HashMap; +import java.util.Map; +import java.util.List; import org.broadinstitute.sting.gatk.GenomeAnalysisTK; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Pair; /** * Created by IntelliJ IDEA. @@ -47,7 +52,34 @@ public abstract class Walker { } public void initialize() { } + public void onTraversalDone(ReduceType result) { out.println("[REDUCE RESULT] Traversal result is: " + result); } + + /** + * General interval reduce routine called after all of the traversals are done + * @param results + */ + public void onTraversalDone(List> results) { + for ( Pair result : results ) { + out.printf("[INTERVAL REDUCE RESULT] at %s ", result.getFirst()); + this.onTraversalDone(result.getSecond()); + } + } + + /** + * Return true if your walker wants to reduce each interval separately. Default is false. + * + * If you set this flag, several things will happen. + * + * The system will invoke reduceInit() once for each interval being processed, starting a fresh reduce + * Reduce will accumulate normally at each map unit in the interval + * However, onTraversalDone(reduce) will be called after each interval is processed. + * The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done, + * which is overloaded here to call onTraversalDone(reduce) for each location + */ + public boolean ReduceByInterval() { + return false; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/WriteBAM.java b/java/src/org/broadinstitute/sting/gatk/walkers/WriteBAM.java new file mode 100644 index 000000000..ba8bc3f24 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/WriteBAM.java @@ -0,0 +1,90 @@ +package org.broadinstitute.sting.playground.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.QualityUtils; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMFileHeader; +import edu.mit.broad.picard.reference.ReferenceSequence; + +import java.util.ArrayList; +import java.util.Random; +import java.io.File; + +/** + * ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the + * read to its home on the reference and noting the mismatch rate. It ignores reads with + * indels in them, treats high and low-quality references bases the same, and does not count + * ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads + * in short order. + * + * @author Kiran Garimella + */ +public class IOCrusherWalker extends ReadWalker> { + @Argument(shortName="nWaysOut",required=false,defaultValue="1") + public int nWaysOut; + + @Argument(shortName="readScaling",required=false,defaultValue="1") + public float readScaling; + + @Argument(shortName="outputBase", required=true) + public String outputBase; + + public long nReadsRead = 0; + public long nReadsWritten = 0; + + /** + * + */ + public SAMRecord map(LocusContext context, SAMRecord read) { + nReadsRead++; + return read; + } + + /** + * + */ + public ArrayList reduceInit() { + SAMFileWriterFactory fact = new SAMFileWriterFactory(); + ArrayList outputs = new ArrayList(nWaysOut); + for ( int i = 0; i < nWaysOut; i++ ) { + SAMFileHeader header = this.getToolkit().getSamReader().getFileHeader(); + outputs.add(fact.makeBAMWriter(header, true, new File(outputBase + "." + i + ".bam"))); + } + return outputs; + } + + /** + * Summarize the error rate data. + * + */ + public ArrayList reduce(SAMRecord read, ArrayList outputs) { + for ( SAMFileWriter out : outputs ) { + if ( readScaling >= 1.0 ) { + int nCopies = (int)Math.ceil(readScaling); + for ( int i = 0; i < nCopies; i++) { + out.addAlignment(read); + nReadsWritten++; + } + } else if ( Math.random() < readScaling ) { + out.addAlignment(read); + nReadsWritten++; + } + } + + return outputs; + } + + /** + * + */ + public void onTraversalDone(ArrayList outputs) { + for ( SAMFileWriter out : outputs ) { + out.close(); + } + System.out.printf("Reads: read %d written %d%n", nReadsRead, nReadsWritten); + } +} \ No newline at end of file