Enabled ReduceByInterval
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ee2f022c71
commit
05c6679321
|
|
@ -351,6 +351,9 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
* Initialize the output streams as specified by the user.
|
||||
*/
|
||||
private void initializeOutputStreams() {
|
||||
out = System.out;
|
||||
err = System.err;
|
||||
|
||||
if( outErrFileName != null && (outFileName != null || errFileName != null) )
|
||||
throw new IllegalArgumentException("Can't set output/error output file with either out file name or err file name");
|
||||
|
||||
|
|
|
|||
|
|
@ -418,7 +418,7 @@ public abstract class TraversalEngine {
|
|||
samReader.setValidationStringency(strictness);
|
||||
|
||||
final SAMFileHeader header = samReader.getFileHeader();
|
||||
logger.info(String.format("Sort order is: " + header.getSortOrder()));
|
||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||
|
||||
return samReader;
|
||||
}
|
||||
|
|
@ -506,11 +506,29 @@ public abstract class TraversalEngine {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public <M, T> T traverse(Walker<M, T> walker) {
|
||||
ArrayList<GenomeLoc> l = new ArrayList<GenomeLoc>();
|
||||
if ( hasLocations() )
|
||||
l = locs;
|
||||
T sum = null;
|
||||
if ( hasLocations() && walker.ReduceByInterval() ) {
|
||||
logger.info("Doing reduce by interval processing");
|
||||
if ( ! hasLocations() )
|
||||
Utils.scareUser("ReduceByInterval requested by no interval was provided");
|
||||
List<Pair<GenomeLoc, T>> map = new ArrayList<Pair<GenomeLoc, T>>(locs.size());
|
||||
for ( GenomeLoc loc : locs ) {
|
||||
ArrayList<GenomeLoc> l = new ArrayList<GenomeLoc>();
|
||||
l.add(loc);
|
||||
T intervalSum = traverse(walker, l);
|
||||
sum = intervalSum;
|
||||
map.add(new Pair<GenomeLoc, T>(loc, intervalSum));
|
||||
}
|
||||
walker.onTraversalDone(map);
|
||||
} else {
|
||||
ArrayList<GenomeLoc> l = new ArrayList<GenomeLoc>();
|
||||
if ( hasLocations() )
|
||||
l = locs;
|
||||
sum = traverse(walker, l);
|
||||
}
|
||||
|
||||
return traverse(walker, l);
|
||||
printOnTraversalDone("elements", sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
public <M, T> T traverse(Walker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class TraverseByIntervals extends TraversalEngine {
|
|||
}
|
||||
}
|
||||
|
||||
printOnTraversalDone("intervals", sum);
|
||||
//printOnTraversalDone("intervals", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class TraverseByLoci extends TraversalEngine {
|
|||
sum = carryWalkerOverInterval(walker, samReadIter, sum, null);
|
||||
}
|
||||
|
||||
printOnTraversalDone("loci", sum);
|
||||
//printOnTraversalDone("loci", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class TraverseByReads extends TraversalEngine {
|
|||
//System.out.printf("Done? %b%n", done);
|
||||
}
|
||||
|
||||
printOnTraversalDone("reads", sum);
|
||||
//printOnTraversalDone("reads", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class TraverseByReference extends TraverseByLoci {
|
|||
sum = carryWalkerOverReference(walker, sum, null);
|
||||
}
|
||||
|
||||
printOnTraversalDone("reference", sum);
|
||||
//printOnTraversalDone("reference", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
return context.getReads().size();
|
||||
}
|
||||
|
||||
public boolean ReduceByInterval() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public Pair<Long, Long> reduceInit() { return new Pair(0l,0l); }
|
||||
|
||||
public Pair<Long, Long> reduce(Integer value, Pair<Long, Long> sum) {
|
||||
|
|
@ -33,6 +37,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>
|
|||
}
|
||||
|
||||
public void onTraversalDone(Pair<Long, Long> result) {
|
||||
out.printf("Average depth of coverage is: %.2f\n", ((double)result.getFirst() / (double)result.getSecond()));
|
||||
out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n",
|
||||
((double)result.getFirst() / (double)result.getSecond()), result.getFirst(), result.getSecond());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisTK;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -47,7 +52,34 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
}
|
||||
|
||||
public void initialize() { }
|
||||
|
||||
public void onTraversalDone(ReduceType result) {
|
||||
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
|
||||
/**
|
||||
* General interval reduce routine called after all of the traversals are done
|
||||
* @param results
|
||||
*/
|
||||
public void onTraversalDone(List<Pair<GenomeLoc, ReduceType>> results) {
|
||||
for ( Pair<GenomeLoc, ReduceType> result : results ) {
|
||||
out.printf("[INTERVAL REDUCE RESULT] at %s ", result.getFirst());
|
||||
this.onTraversalDone(result.getSecond());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if your walker wants to reduce each interval separately. Default is false.
|
||||
*
|
||||
* If you set this flag, several things will happen.
|
||||
*
|
||||
* The system will invoke reduceInit() once for each interval being processed, starting a fresh reduce
|
||||
* Reduce will accumulate normally at each map unit in the interval
|
||||
* However, onTraversalDone(reduce) will be called after each interval is processed.
|
||||
* The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done,
|
||||
* which is overloaded here to call onTraversalDone(reduce) for each location
|
||||
*/
|
||||
public boolean ReduceByInterval() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileWriterFactory;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
|
||||
* read to its home on the reference and noting the mismatch rate. It ignores reads with
|
||||
* indels in them, treats high and low-quality references bases the same, and does not count
|
||||
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
|
||||
* in short order.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWriter>> {
|
||||
@Argument(shortName="nWaysOut",required=false,defaultValue="1")
|
||||
public int nWaysOut;
|
||||
|
||||
@Argument(shortName="readScaling",required=false,defaultValue="1")
|
||||
public float readScaling;
|
||||
|
||||
@Argument(shortName="outputBase", required=true)
|
||||
public String outputBase;
|
||||
|
||||
public long nReadsRead = 0;
|
||||
public long nReadsWritten = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public SAMRecord map(LocusContext context, SAMRecord read) {
|
||||
nReadsRead++;
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public ArrayList<SAMFileWriter> reduceInit() {
|
||||
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
||||
ArrayList<SAMFileWriter> outputs = new ArrayList<SAMFileWriter>(nWaysOut);
|
||||
for ( int i = 0; i < nWaysOut; i++ ) {
|
||||
SAMFileHeader header = this.getToolkit().getSamReader().getFileHeader();
|
||||
outputs.add(fact.makeBAMWriter(header, true, new File(outputBase + "." + i + ".bam")));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize the error rate data.
|
||||
*
|
||||
*/
|
||||
public ArrayList<SAMFileWriter> reduce(SAMRecord read, ArrayList<SAMFileWriter> outputs) {
|
||||
for ( SAMFileWriter out : outputs ) {
|
||||
if ( readScaling >= 1.0 ) {
|
||||
int nCopies = (int)Math.ceil(readScaling);
|
||||
for ( int i = 0; i < nCopies; i++) {
|
||||
out.addAlignment(read);
|
||||
nReadsWritten++;
|
||||
}
|
||||
} else if ( Math.random() < readScaling ) {
|
||||
out.addAlignment(read);
|
||||
nReadsWritten++;
|
||||
}
|
||||
}
|
||||
|
||||
return outputs;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void onTraversalDone(ArrayList<SAMFileWriter> outputs) {
|
||||
for ( SAMFileWriter out : outputs ) {
|
||||
out.close();
|
||||
}
|
||||
System.out.printf("Reads: read %d written %d%n", nReadsRead, nReadsWritten);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue