- Bring the PaperGenotyper up to code.
- Remove some old debugging cruft regarding handling of threaded engine exceptions. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3796 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f65cba6b9a
commit
dfddf8fd75
|
|
@ -176,10 +176,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||
public List<String> readGroupBlackList = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "enable_threaded_debugging",shortName="etd", doc="Enable debugging of threaded apps by applying exception catching in the threaded version of the GATK.", required = false)
|
||||
public boolean enableThreadedDebugging = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "enable_overlap_filters",shortName="eof", doc="Enable automatic removal of bases that overlap adaptor sequence or that overlap their mate pair", required = false)
|
||||
public boolean enableOverlapFilters = false;
|
||||
|
|
@ -353,9 +349,6 @@ public class GATKArgumentCollection {
|
|||
if (other.intervalMerging != this.intervalMerging) {
|
||||
return false;
|
||||
}
|
||||
if (enableThreadedDebugging != other.enableThreadedDebugging) {
|
||||
return false;
|
||||
}
|
||||
if (enableOverlapFilters != other.enableOverlapFilters) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.executive;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/**
|
||||
* an override of the ThreadedPoolExecutor, that throws the exception when an exception is seen in a thread.
|
||||
*/
|
||||
public class ExceptionAwareThreadPool extends ThreadPoolExecutor {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(ExceptionAwareThreadPool.class);
|
||||
|
||||
public ExceptionAwareThreadPool(int numberOfThreads) {
|
||||
super(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
|
||||
Utils.warnUser("Using the etd (enable threaded debugging) mode and the ExceptionAwareThreadPool is dangerous to " +
|
||||
"yourself and others. DONT USE IT IN A PRODUCTION SETTING, OR AT ALL!!!!");
|
||||
}
|
||||
|
||||
/**
|
||||
* attempt to determine the fate of a runnable object
|
||||
* @param r the runnable (in our case a Future object)
|
||||
* @param t any throwables from the thread.
|
||||
*/
|
||||
@Override
|
||||
public void afterExecute(Runnable r, Throwable t) {
|
||||
super.afterExecute(r, t);
|
||||
// from http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6459119. The throwable in the method parameters
|
||||
// is not actually the thrown exception from the underlying thread, we have to go get the cause.
|
||||
if (r instanceof Future<?>) {
|
||||
try {
|
||||
Object result = ((Future<?>) r).get();
|
||||
// once we have the result, we know everything went fine
|
||||
logger.debug("Thread completed successfully");
|
||||
} catch (InterruptedException ie) {
|
||||
caughtException(ie);
|
||||
} catch (ExecutionException ee) {
|
||||
caughtException(ee.getCause());
|
||||
} catch (CancellationException ce) {
|
||||
caughtException(ce);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ungracefully shutdown the GATK
|
||||
* @param e the throwable object, which caused us to fail
|
||||
*/
|
||||
public void caughtException(Throwable e) {
|
||||
// shutdown all the threads, not waiting to finish
|
||||
this.shutdownNow();
|
||||
|
||||
// cite the reason we crashed out
|
||||
logger.fatal("Thread pool caught an exception from a thread: ");
|
||||
e.printStackTrace();
|
||||
|
||||
// bail in the ugliest way possible
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -83,10 +83,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||
super(engine, walker, reads, reference, rods);
|
||||
|
||||
if (GenomeAnalysisEngine.instance.getArguments().enableThreadedDebugging)
|
||||
this.threadPool = new ExceptionAwareThreadPool(nThreadsToUse);
|
||||
else
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
|
||||
try {
|
||||
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
|
|
@ -44,14 +45,10 @@ import java.io.PrintStream;
|
|||
*
|
||||
* @author aaron
|
||||
*/
|
||||
public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
||||
// the possible diploid genotype strings
|
||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
||||
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
||||
private PrintStream outputStream;
|
||||
|
||||
@Argument(fullName = "log_odds_score", shortName = "LOD", doc = "The LOD threshold for us to call confidently a genotype", required = false)
|
||||
private double LODScore = 3.0;
|
||||
|
||||
|
|
@ -64,10 +61,10 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
|||
* @param context the locus context, which contains all of the read information
|
||||
* @return a SimpleCall, which stores the genotype we're calling and the LOD score
|
||||
*/
|
||||
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads();
|
||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||
0.01);
|
||||
|
|
@ -78,26 +75,29 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
|||
// for each genotype, determine it's likelihood value
|
||||
for (GENOTYPE genotype : GENOTYPE.values())
|
||||
for (int index = 0; index < bases.length; index++) {
|
||||
if (quals[index] > 0) {
|
||||
// our epsilon is the de-Phred scored base quality
|
||||
double epsilon = Math.pow(10, quals[index] / -10.0);
|
||||
// our epsilon is the de-Phred scored base quality
|
||||
double epsilon = Math.pow(10, quals[index] / -10.0);
|
||||
|
||||
byte pileupBase = bases[index];
|
||||
double p = 0;
|
||||
for (char r : genotype.toString().toCharArray())
|
||||
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
|
||||
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
|
||||
}
|
||||
byte pileupBase = bases[index];
|
||||
double p = 0;
|
||||
for (char r : genotype.toString().toCharArray())
|
||||
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
|
||||
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
|
||||
}
|
||||
|
||||
Integer sortedList[] = MathUtils.sortPermutation(likelihoods);
|
||||
|
||||
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
||||
// and calculate the LOD score from best - next best (9 and 8 in the sorted list, since the best likelihoods are closest to zero)
|
||||
return new SimpleCall(context.getLocation(),
|
||||
GENOTYPE.values()[sortedList[9]].toString(),
|
||||
likelihoods[sortedList[9]] - likelihoods[sortedList[8]],
|
||||
ref.getBaseAsChar());
|
||||
GENOTYPE selectedGenotype = GENOTYPE.values()[sortedList[sortedList.length-1]];
|
||||
double lod = likelihoods[sortedList[sortedList.length-1]] - likelihoods[sortedList[sortedList.length-2]];
|
||||
|
||||
if (lod > LODScore) {
|
||||
out.printf("%s\t%s\t%.4f\t%c%n", context.getLocation(), selectedGenotype, lod, (char)ref.getBase());
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,22 +105,19 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
|||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType. We filter out calls,
|
||||
* first making sure that the call is != null, secondly that the LOD score is above a moderate
|
||||
* threshold (in this case 3).
|
||||
* Outputs the number of genotypes called.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(SimpleCall value, Integer sum) {
|
||||
if (value != null && value.LOD > LODScore) outputStream.println(value.toString());
|
||||
return sum + 1;
|
||||
public Long reduce(Integer value, Long sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,7 +127,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
|||
* @param rhs 'right-most' portion of data in the composite reduce.
|
||||
* @return The composite reduce type.
|
||||
*/
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
public Long treeReduce(Long lhs, Long rhs) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue