- Bring the PaperGenotyper up to code.
- Remove some old debugging cruft regarding handling of threaded engine exceptions. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3796 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f65cba6b9a
commit
dfddf8fd75
|
|
@ -176,10 +176,6 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||||
public List<String> readGroupBlackList = null;
|
public List<String> readGroupBlackList = null;
|
||||||
|
|
||||||
@Element(required = false)
|
|
||||||
@Argument(fullName = "enable_threaded_debugging",shortName="etd", doc="Enable debugging of threaded apps by applying exception catching in the threaded version of the GATK.", required = false)
|
|
||||||
public boolean enableThreadedDebugging = false;
|
|
||||||
|
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Argument(fullName = "enable_overlap_filters",shortName="eof", doc="Enable automatic removal of bases that overlap adaptor sequence or that overlap their mate pair", required = false)
|
@Argument(fullName = "enable_overlap_filters",shortName="eof", doc="Enable automatic removal of bases that overlap adaptor sequence or that overlap their mate pair", required = false)
|
||||||
public boolean enableOverlapFilters = false;
|
public boolean enableOverlapFilters = false;
|
||||||
|
|
@ -353,9 +349,6 @@ public class GATKArgumentCollection {
|
||||||
if (other.intervalMerging != this.intervalMerging) {
|
if (other.intervalMerging != this.intervalMerging) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (enableThreadedDebugging != other.enableThreadedDebugging) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (enableOverlapFilters != other.enableOverlapFilters) {
|
if (enableOverlapFilters != other.enableOverlapFilters) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.executive;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
import java.util.concurrent.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* an override of the ThreadedPoolExecutor, that throws the exception when an exception is seen in a thread.
|
|
||||||
*/
|
|
||||||
public class ExceptionAwareThreadPool extends ThreadPoolExecutor {
|
|
||||||
/**
|
|
||||||
* our log, which we want to capture anything from this class
|
|
||||||
*/
|
|
||||||
private static Logger logger = Logger.getLogger(ExceptionAwareThreadPool.class);
|
|
||||||
|
|
||||||
public ExceptionAwareThreadPool(int numberOfThreads) {
|
|
||||||
super(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
|
|
||||||
Utils.warnUser("Using the etd (enable threaded debugging) mode and the ExceptionAwareThreadPool is dangerous to " +
|
|
||||||
"yourself and others. DONT USE IT IN A PRODUCTION SETTING, OR AT ALL!!!!");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* attempt to determine the fate of a runnable object
|
|
||||||
* @param r the runnable (in our case a Future object)
|
|
||||||
* @param t any throwables from the thread.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void afterExecute(Runnable r, Throwable t) {
|
|
||||||
super.afterExecute(r, t);
|
|
||||||
// from http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6459119. The throwable in the method parameters
|
|
||||||
// is not actually the thrown exception from the underlying thread, we have to go get the cause.
|
|
||||||
if (r instanceof Future<?>) {
|
|
||||||
try {
|
|
||||||
Object result = ((Future<?>) r).get();
|
|
||||||
// once we have the result, we know everything went fine
|
|
||||||
logger.debug("Thread completed successfully");
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
caughtException(ie);
|
|
||||||
} catch (ExecutionException ee) {
|
|
||||||
caughtException(ee.getCause());
|
|
||||||
} catch (CancellationException ce) {
|
|
||||||
caughtException(ce);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ungracefully shutdown the GATK
|
|
||||||
* @param e the throwable object, which caused us to fail
|
|
||||||
*/
|
|
||||||
public void caughtException(Throwable e) {
|
|
||||||
// shutdown all the threads, not waiting to finish
|
|
||||||
this.shutdownNow();
|
|
||||||
|
|
||||||
// cite the reason we crashed out
|
|
||||||
logger.fatal("Thread pool caught an exception from a thread: ");
|
|
||||||
e.printStackTrace();
|
|
||||||
|
|
||||||
// bail in the ugliest way possible
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -83,10 +83,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||||
super(engine, walker, reads, reference, rods);
|
super(engine, walker, reads, reference, rods);
|
||||||
|
|
||||||
if (GenomeAnalysisEngine.instance.getArguments().enableThreadedDebugging)
|
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||||
this.threadPool = new ExceptionAwareThreadPool(nThreadsToUse);
|
|
||||||
else
|
|
||||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
|
|
@ -44,14 +45,10 @@ import java.io.PrintStream;
|
||||||
*
|
*
|
||||||
* @author aaron
|
* @author aaron
|
||||||
*/
|
*/
|
||||||
public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> implements TreeReducible<Integer> {
|
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
||||||
|
|
||||||
// the possible diploid genotype strings
|
// the possible diploid genotype strings
|
||||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
|
|
||||||
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
|
||||||
private PrintStream outputStream;
|
|
||||||
|
|
||||||
@Argument(fullName = "log_odds_score", shortName = "LOD", doc = "The LOD threshold for us to call confidently a genotype", required = false)
|
@Argument(fullName = "log_odds_score", shortName = "LOD", doc = "The LOD threshold for us to call confidently a genotype", required = false)
|
||||||
private double LODScore = 3.0;
|
private double LODScore = 3.0;
|
||||||
|
|
||||||
|
|
@ -64,10 +61,10 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
||||||
* @param context the locus context, which contains all of the read information
|
* @param context the locus context, which contains all of the read information
|
||||||
* @return a SimpleCall, which stores the genotype we're calling and the LOD score
|
* @return a SimpleCall, which stores the genotype we're calling and the LOD score
|
||||||
*/
|
*/
|
||||||
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
||||||
|
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getBasePileup().getPileupWithoutMappingQualityZeroReads();
|
||||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||||
0.01);
|
0.01);
|
||||||
|
|
@ -78,26 +75,29 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
||||||
// for each genotype, determine it's likelihood value
|
// for each genotype, determine it's likelihood value
|
||||||
for (GENOTYPE genotype : GENOTYPE.values())
|
for (GENOTYPE genotype : GENOTYPE.values())
|
||||||
for (int index = 0; index < bases.length; index++) {
|
for (int index = 0; index < bases.length; index++) {
|
||||||
if (quals[index] > 0) {
|
// our epsilon is the de-Phred scored base quality
|
||||||
// our epsilon is the de-Phred scored base quality
|
double epsilon = Math.pow(10, quals[index] / -10.0);
|
||||||
double epsilon = Math.pow(10, quals[index] / -10.0);
|
|
||||||
|
|
||||||
byte pileupBase = bases[index];
|
byte pileupBase = bases[index];
|
||||||
double p = 0;
|
double p = 0;
|
||||||
for (char r : genotype.toString().toCharArray())
|
for (char r : genotype.toString().toCharArray())
|
||||||
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
|
p += r == pileupBase ? 1 - epsilon : epsilon / 3;
|
||||||
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
|
likelihoods[genotype.ordinal()] += Math.log10(p / genotype.toString().length());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Integer sortedList[] = MathUtils.sortPermutation(likelihoods);
|
Integer sortedList[] = MathUtils.sortPermutation(likelihoods);
|
||||||
|
|
||||||
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
||||||
// and calculate the LOD score from best - next best (9 and 8 in the sorted list, since the best likelihoods are closest to zero)
|
// and calculate the LOD score from best - next best (9 and 8 in the sorted list, since the best likelihoods are closest to zero)
|
||||||
return new SimpleCall(context.getLocation(),
|
GENOTYPE selectedGenotype = GENOTYPE.values()[sortedList[sortedList.length-1]];
|
||||||
GENOTYPE.values()[sortedList[9]].toString(),
|
double lod = likelihoods[sortedList[sortedList.length-1]] - likelihoods[sortedList[sortedList.length-2]];
|
||||||
likelihoods[sortedList[9]] - likelihoods[sortedList[8]],
|
|
||||||
ref.getBaseAsChar());
|
if (lod > LODScore) {
|
||||||
|
out.printf("%s\t%s\t%.4f\t%c%n", context.getLocation(), selectedGenotype, lod, (char)ref.getBase());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -105,22 +105,19 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
||||||
*
|
*
|
||||||
* @return Initial value of reduce.
|
* @return Initial value of reduce.
|
||||||
*/
|
*/
|
||||||
public Integer reduceInit() {
|
public Long reduceInit() {
|
||||||
return 0;
|
return 0L;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reduces a single map with the accumulator provided as the ReduceType. We filter out calls,
|
* Outputs the number of genotypes called.
|
||||||
* first making sure that the call is != null, secondly that the LOD score is above a moderate
|
|
||||||
* threshold (in this case 3).
|
|
||||||
*
|
*
|
||||||
* @param value result of the map.
|
* @param value result of the map.
|
||||||
* @param sum accumulator for the reduce.
|
* @param sum accumulator for the reduce.
|
||||||
* @return accumulator with result of the map taken into account.
|
* @return accumulator with result of the map taken into account.
|
||||||
*/
|
*/
|
||||||
public Integer reduce(SimpleCall value, Integer sum) {
|
public Long reduce(Integer value, Long sum) {
|
||||||
if (value != null && value.LOD > LODScore) outputStream.println(value.toString());
|
return value + sum;
|
||||||
return sum + 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -130,7 +127,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
||||||
* @param rhs 'right-most' portion of data in the composite reduce.
|
* @param rhs 'right-most' portion of data in the composite reduce.
|
||||||
* @return The composite reduce type.
|
* @return The composite reduce type.
|
||||||
*/
|
*/
|
||||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
public Long treeReduce(Long lhs, Long rhs) {
|
||||||
return lhs + rhs;
|
return lhs + rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue