From dfddf8fd75080b2afbc7cf28b4987e5f792ee2c4 Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 14 Jul 2010 22:31:21 +0000 Subject: [PATCH] - Bring the PaperGenotyper up to code. - Remove some old debugging cruft regarding handling of threaded engine exceptions. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3796 348d0f76-0448-11de-a6fe-93d51630548a --- .../arguments/GATKArgumentCollection.java | 7 -- .../executive/ExceptionAwareThreadPool.java | 65 ------------------- .../executive/HierarchicalMicroScheduler.java | 5 +- .../papergenotyper/GATKPaperGenotyper.java | 55 ++++++++-------- 4 files changed, 27 insertions(+), 105 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/gatk/executive/ExceptionAwareThreadPool.java diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 576f9be3a..b8ee3437b 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -176,10 +176,6 @@ public class GATKArgumentCollection { @Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false) public List readGroupBlackList = null; - @Element(required = false) - @Argument(fullName = "enable_threaded_debugging",shortName="etd", doc="Enable debugging of threaded apps by applying exception catching in the threaded version of the GATK.", required = false) - public boolean enableThreadedDebugging = false; - @Element(required = false) @Argument(fullName = "enable_overlap_filters",shortName="eof", doc="Enable automatic removal of bases that overlap adaptor sequence or that overlap their mate pair", required = false) public boolean enableOverlapFilters = false; @@ -353,9 +349,6 @@ public class GATKArgumentCollection { if (other.intervalMerging != this.intervalMerging) { return false; } - if (enableThreadedDebugging != other.enableThreadedDebugging) { - return false; - } if (enableOverlapFilters != other.enableOverlapFilters) { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ExceptionAwareThreadPool.java b/java/src/org/broadinstitute/sting/gatk/executive/ExceptionAwareThreadPool.java deleted file mode 100644 index 7c8c2c308..000000000 --- a/java/src/org/broadinstitute/sting/gatk/executive/ExceptionAwareThreadPool.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.sting.gatk.executive; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; - -import java.util.concurrent.*; - -/** - * an override of the ThreadedPoolExecutor, that throws the exception when an exception is seen in a thread. - */ -public class ExceptionAwareThreadPool extends ThreadPoolExecutor { - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(ExceptionAwareThreadPool.class); - - public ExceptionAwareThreadPool(int numberOfThreads) { - super(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue()); - Utils.warnUser("Using the etd (enable threaded debugging) mode and the ExceptionAwareThreadPool is dangerous to " + - "yourself and others. DONT USE IT IN A PRODUCTION SETTING, OR AT ALL!!!!"); - } - - /** - * attempt to determine the fate of a runnable object - * @param r the runnable (in our case a Future object) - * @param t any throwables from the thread. - */ - @Override - public void afterExecute(Runnable r, Throwable t) { - super.afterExecute(r, t); - // from http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6459119. The throwable in the method parameters - // is not actually the thrown exception from the underlying thread, we have to go get the cause. - if (r instanceof Future) { - try { - Object result = ((Future) r).get(); - // once we have the result, we know everything went fine - logger.debug("Thread completed successfully"); - } catch (InterruptedException ie) { - caughtException(ie); - } catch (ExecutionException ee) { - caughtException(ee.getCause()); - } catch (CancellationException ce) { - caughtException(ce); - } - } - } - - /** - * ungracefully shutdown the GATK - * @param e the throwable object, which caused us to fail - */ - public void caughtException(Throwable e) { - // shutdown all the threads, not waiting to finish - this.shutdownNow(); - - // cite the reason we crashed out - logger.fatal("Thread pool caught an exception from a thread: "); - e.printStackTrace(); - - // bail in the ugliest way possible - System.exit(1); - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 6f552ba2f..a6ed2f2b4 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -83,10 +83,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse ) { super(engine, walker, reads, reference, rods); - if (GenomeAnalysisEngine.instance.getArguments().enableThreadedDebugging) - this.threadPool = new ExceptionAwareThreadPool(nThreadsToUse); - else - this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); + this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); try { MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java index 49f97639d..119d416b6 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; @@ -44,14 +45,10 @@ import java.io.PrintStream; * * @author aaron */ -public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { - +public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { // the possible diploid genotype strings private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } - @Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true) - private PrintStream outputStream; - @Argument(fullName = "log_odds_score", shortName = "LOD", doc = "The LOD threshold for us to call confidently a genotype", required = false) private double LODScore = 3.0; @@ -64,10 +61,10 @@ public class GATKPaperGenotyper extends LocusWalker impleme * @param context the locus context, which contains all of the read information * @return a SimpleCall, which stores the genotype we're calling and the LOD score */ - public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case - ReadBackedPileup pileup = context.getPileup(); + ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads(); double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(), DiploidGenotypePriors.HUMAN_HETEROZYGOSITY, 0.01); @@ -78,26 +75,29 @@ public class GATKPaperGenotyper extends LocusWalker impleme // for each genotype, determine it's likelihood value for (GENOTYPE genotype : GENOTYPE.values()) for (int index = 0; index < bases.length; index++) { - if (quals[index] > 0) { - // our epsilon is the de-Phred scored base quality - double epsilon = Math.pow(10, quals[index] / -10.0); + // our epsilon is the de-Phred scored base quality + double epsilon = Math.pow(10, quals[index] / -10.0); - byte pileupBase = bases[index]; - double p = 0; - for (char r : genotype.toString().toCharArray()) - p += r == pileupBase ? 1 - epsilon : epsilon / 3; - likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length()); - } + byte pileupBase = bases[index]; + double p = 0; + for (char r : genotype.toString().toCharArray()) + p += r == pileupBase ? 1 - epsilon : epsilon / 3; + likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length()); } Integer sortedList[] = MathUtils.sortPermutation(likelihoods); // create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString()) // and calculate the LOD score from best - next best (9 and 8 in the sorted list, since the best likelihoods are closest to zero) - return new SimpleCall(context.getLocation(), - GENOTYPE.values()[sortedList[9]].toString(), - likelihoods[sortedList[9]] - likelihoods[sortedList[8]], - ref.getBaseAsChar()); + GENOTYPE selectedGenotype = GENOTYPE.values()[sortedList[sortedList.length-1]]; + double lod = likelihoods[sortedList[sortedList.length-1]] - likelihoods[sortedList[sortedList.length-2]]; + + if (lod > LODScore) { + out.printf("%s\t%s\t%.4f\t%c%n", context.getLocation(), selectedGenotype, lod, (char)ref.getBase()); + return 1; + } + + return 0; } /** @@ -105,22 +105,19 @@ public class GATKPaperGenotyper extends LocusWalker impleme * * @return Initial value of reduce. */ - public Integer reduceInit() { - return 0; + public Long reduceInit() { + return 0L; } /** - * Reduces a single map with the accumulator provided as the ReduceType. We filter out calls, - * first making sure that the call is != null, secondly that the LOD score is above a moderate - * threshold (in this case 3). + * Outputs the number of genotypes called. * * @param value result of the map. * @param sum accumulator for the reduce. * @return accumulator with result of the map taken into account. */ - public Integer reduce(SimpleCall value, Integer sum) { - if (value != null && value.LOD > LODScore) outputStream.println(value.toString()); - return sum + 1; + public Long reduce(Integer value, Long sum) { + return value + sum; } /** @@ -130,7 +127,7 @@ public class GATKPaperGenotyper extends LocusWalker impleme * @param rhs 'right-most' portion of data in the composite reduce. * @return The composite reduce type. */ - public Integer treeReduce(Integer lhs, Integer rhs) { + public Long treeReduce(Long lhs, Long rhs) { return lhs + rhs; }