- Bring the PaperGenotyper up to code.

- Remove some old debugging cruft regarding handling of threaded engine exceptions.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3796 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-07-14 22:31:21 +00:00
parent f65cba6b9a
commit dfddf8fd75
4 changed files with 27 additions and 105 deletions

View File

@ -176,10 +176,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
public List<String> readGroupBlackList = null;
@Element(required = false)
@Argument(fullName = "enable_threaded_debugging",shortName="etd", doc="Enable debugging of threaded apps by applying exception catching in the threaded version of the GATK.", required = false)
public boolean enableThreadedDebugging = false;
@Element(required = false)
@Argument(fullName = "enable_overlap_filters",shortName="eof", doc="Enable automatic removal of bases that overlap adaptor sequence or that overlap their mate pair", required = false)
public boolean enableOverlapFilters = false;
@ -353,9 +349,6 @@ public class GATKArgumentCollection {
if (other.intervalMerging != this.intervalMerging) {
return false;
}
if (enableThreadedDebugging != other.enableThreadedDebugging) {
return false;
}
if (enableOverlapFilters != other.enableOverlapFilters) {
return false;
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.gatk.executive;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.util.concurrent.*;
/**
* an override of the ThreadedPoolExecutor, that throws the exception when an exception is seen in a thread.
*/
public class ExceptionAwareThreadPool extends ThreadPoolExecutor {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(ExceptionAwareThreadPool.class);
public ExceptionAwareThreadPool(int numberOfThreads) {
super(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
Utils.warnUser("Using the etd (enable threaded debugging) mode and the ExceptionAwareThreadPool is dangerous to " +
"yourself and others. DONT USE IT IN A PRODUCTION SETTING, OR AT ALL!!!!");
}
/**
* attempt to determine the fate of a runnable object
* @param r the runnable (in our case a Future object)
* @param t any throwables from the thread.
*/
@Override
public void afterExecute(Runnable r, Throwable t) {
super.afterExecute(r, t);
// from http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6459119. The throwable in the method parameters
// is not actually the thrown exception from the underlying thread, we have to go get the cause.
if (r instanceof Future<?>) {
try {
Object result = ((Future<?>) r).get();
// once we have the result, we know everything went fine
logger.debug("Thread completed successfully");
} catch (InterruptedException ie) {
caughtException(ie);
} catch (ExecutionException ee) {
caughtException(ee.getCause());
} catch (CancellationException ce) {
caughtException(ce);
}
}
}
/**
* ungracefully shutdown the GATK
* @param e the throwable object, which caused us to fail
*/
public void caughtException(Throwable e) {
// shutdown all the threads, not waiting to finish
this.shutdownNow();
// cite the reason we crashed out
logger.fatal("Thread pool caught an exception from a thread: ");
e.printStackTrace();
// bail in the ugliest way possible
System.exit(1);
}
}

View File

@ -83,10 +83,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(engine, walker, reads, reference, rods);
if (GenomeAnalysisEngine.instance.getArguments().enableThreadedDebugging)
this.threadPool = new ExceptionAwareThreadPool(nThreadsToUse);
else
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
try {
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
@ -44,14 +45,10 @@ import java.io.PrintStream;
*
* @author aaron
*/
public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> implements TreeReducible<Integer> {
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
// the possible diploid genotype strings
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
private PrintStream outputStream;
@Argument(fullName = "log_odds_score", shortName = "LOD", doc = "The LOD threshold for us to call confidently a genotype", required = false)
private double LODScore = 3.0;
@ -64,10 +61,10 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
* @param context the locus context, which contains all of the read information
* @return a SimpleCall, which stores the genotype we're calling and the LOD score
*/
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads();
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
0.01);
@ -78,26 +75,29 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
// for each genotype, determine it's likelihood value
for (GENOTYPE genotype : GENOTYPE.values())
for (int index = 0; index < bases.length; index++) {
if (quals[index] > 0) {
// our epsilon is the de-Phred scored base quality
double epsilon = Math.pow(10, quals[index] / -10.0);
// our epsilon is the de-Phred scored base quality
double epsilon = Math.pow(10, quals[index] / -10.0);
byte pileupBase = bases[index];
double p = 0;
for (char r : genotype.toString().toCharArray())
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
}
byte pileupBase = bases[index];
double p = 0;
for (char r : genotype.toString().toCharArray())
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
}
Integer sortedList[] = MathUtils.sortPermutation(likelihoods);
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
// and calculate the LOD score from best - next best (9 and 8 in the sorted list, since the best likelihoods are closest to zero)
return new SimpleCall(context.getLocation(),
GENOTYPE.values()[sortedList[9]].toString(),
likelihoods[sortedList[9]] - likelihoods[sortedList[8]],
ref.getBaseAsChar());
GENOTYPE selectedGenotype = GENOTYPE.values()[sortedList[sortedList.length-1]];
double lod = likelihoods[sortedList[sortedList.length-1]] - likelihoods[sortedList[sortedList.length-2]];
if (lod > LODScore) {
out.printf("%s\t%s\t%.4f\t%c%n", context.getLocation(), selectedGenotype, lod, (char)ref.getBase());
return 1;
}
return 0;
}
/**
@ -105,22 +105,19 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
return 0;
public Long reduceInit() {
return 0L;
}
/**
* Reduces a single map with the accumulator provided as the ReduceType. We filter out calls,
* first making sure that the call is != null, secondly that the LOD score is above a moderate
* threshold (in this case 3).
* Outputs the number of genotypes called.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(SimpleCall value, Integer sum) {
if (value != null && value.LOD > LODScore) outputStream.println(value.toString());
return sum + 1;
public Long reduce(Integer value, Long sum) {
return value + sum;
}
/**
@ -130,7 +127,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
* @param rhs 'right-most' portion of data in the composite reduce.
* @return The composite reduce type.
*/
public Integer treeReduce(Integer lhs, Integer rhs) {
public Long treeReduce(Long lhs, Long rhs) {
return lhs + rhs;
}