merge development branchs of log-less HMM and FastGatherer to master
This commit is contained in:
commit
81eef3aa37
|
|
@ -28,7 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pairhmm.*;
|
||||
|
|
@ -95,12 +97,12 @@ public class LikelihoodCalculationEngine {
|
|||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
|
||||
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
|
||||
// evaluate the likelihood of the reads given those haplotypes
|
||||
stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue(), sampleEntry.getKey()));
|
||||
stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue()));
|
||||
}
|
||||
return stratifiedReadMap;
|
||||
}
|
||||
|
||||
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample ) {
|
||||
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads) {
|
||||
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
|
||||
final int numHaplotypes = haplotypes.size();
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ public abstract class CommandLineProgram {
|
|||
clp.setupLoggerLevel(layout);
|
||||
|
||||
Class[] argumentSources = clp.getArgumentSources();
|
||||
for (Class argumentSource : argumentSources)
|
||||
for (Class argumentSource : argumentSources)
|
||||
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
|
||||
parsedArgs = parser.parse(args);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,5 +35,6 @@ import java.lang.annotation.*;
|
|||
@Target({ElementType.FIELD})
|
||||
public @interface Gather {
|
||||
Class value() default Gather.class;
|
||||
String className() default "";
|
||||
boolean enabled() default true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,94 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: ami
|
||||
* Date: 1/3/13
|
||||
* Time: 1:51 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
public class CoveredByNSamplesSites extends RodWalker<Integer,Integer> implements TreeReducible<Integer> {
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
@Output(fullName = "OutputIntervals", shortName = "intervals", doc = "Name of file for output intervals", required = true)
|
||||
File intervalFile;
|
||||
|
||||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
|
||||
@Argument(fullName = "minCoverage", shortName = "minCov",doc = "only samples that have coverage higher then the min are taking into account", required = false)
|
||||
int minCoverage = 10;
|
||||
|
||||
@Argument(fullName = "samplePercentage", shortName = "percentage",doc = "only sites that have more than samplePercentage samples with minCoverage", required = false)
|
||||
double samplePercentage = 0.9;
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context){
|
||||
if(tracker == null)
|
||||
return 0;
|
||||
|
||||
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
if (vcs.size() == 0)
|
||||
return 0;
|
||||
|
||||
for (VariantContext vc : vcs){
|
||||
int countCoveredSamples = 0;
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
final int numOfGenotype = genotypes.size();
|
||||
for (Genotype g : genotypes){
|
||||
if (g.getDP() > minCoverage)
|
||||
countCoveredSamples++;
|
||||
}
|
||||
if (countCoveredSamples/numOfGenotype >= samplePercentage)
|
||||
out.print(ref.getLocus());
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
|
||||
@Override
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell the user the number of loci processed and close out the new variants file.
|
||||
*
|
||||
* @param result the number of loci seen.
|
||||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info("Processed " + result + " loci.\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -157,6 +157,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
/**
|
||||
* A raw, unfiltered, highly sensitive callset in VCF format.
|
||||
*/
|
||||
@Gather(className = "org.broadinstitute.sting.queue.extensions.gatk.CatVariantsGatherer")
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VariantContextWriter writer = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -101,8 +102,11 @@ public class PairHMMIndelErrorModel {
|
|||
break;
|
||||
case CACHING:
|
||||
case LOGLESS_CACHING:
|
||||
pairHMM = new LoglessCachingPairHMM();
|
||||
System.err.println("warning: this option (LOGLESS_CACHING in UG) is still under development");
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT.");
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development).");
|
||||
}
|
||||
|
||||
// fill gap penalty table, affine naive model:
|
||||
|
|
@ -332,6 +336,7 @@ public class PairHMMIndelErrorModel {
|
|||
getContextHomopolymerLength(readBases,hrunProfile);
|
||||
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
||||
|
||||
boolean firstHap = true;
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
|
||||
Haplotype haplotype = haplotypeMap.get(a);
|
||||
|
|
@ -371,7 +376,7 @@ public class PairHMMIndelErrorModel {
|
|||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals,
|
||||
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
||||
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
||||
contextLogGapContinuationProbabilities, startIndexInHaplotype, false);
|
||||
contextLogGapContinuationProbabilities, startIndexInHaplotype, firstHap);
|
||||
|
||||
|
||||
if (DEBUG) {
|
||||
|
|
@ -383,6 +388,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
perReadAlleleLikelihoodMap.add(p, a, readLikelihood);
|
||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||
firstHap = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
|
||||
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
|
||||
logger.info("Priority string not provided, using arbitrary genotyping order: "+priority);
|
||||
logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
|
||||
}
|
||||
|
||||
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk;
|
||||
|
||||
import net.sf.samtools.BAMIndex;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import org.broad.tribble.Tribble;
|
||||
|
|
@ -119,9 +118,14 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
|
||||
for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType))
|
||||
if (!argumentSource.isDeprecated()) {
|
||||
Class<?> gatherer = null;
|
||||
if (argumentSource.field.isAnnotationPresent(Gather.class))
|
||||
gatherer = argumentSource.field.getAnnotation(Gather.class).value();
|
||||
String gatherer = null;
|
||||
if (argumentSource.field.isAnnotationPresent(Gather.class)) {
|
||||
Gather gather = argumentSource.field.getAnnotation(Gather.class);
|
||||
if(! "".equals(gather.className()))
|
||||
gatherer = gather.className();
|
||||
else
|
||||
gatherer = gather.value().getName();
|
||||
}
|
||||
for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
|
||||
argumentFields.addAll(getArgumentFields(argumentDefinition, gatherer));
|
||||
}
|
||||
|
|
@ -130,7 +134,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
|
||||
|
||||
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, Class<?> gatherer) {
|
||||
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) {
|
||||
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
return Arrays.asList(
|
||||
new IntervalFileArgumentField(argumentDefinition),
|
||||
|
|
@ -154,7 +158,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
String gatherClass;
|
||||
if (gatherer != null)
|
||||
gatherClass = gatherer.getName();
|
||||
gatherClass = gatherer;
|
||||
else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType))
|
||||
gatherClass = "BamGatherFunction";
|
||||
else if (VariantContextWriter.class.isAssignableFrom(argumentDefinition.argumentType))
|
||||
|
|
|
|||
|
|
@ -196,6 +196,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
"org.broadinstitute.sting.tools",
|
||||
"org.broadinstitute.sting.gatk",
|
||||
"org.broadinstitute.sting.pipeline",
|
||||
"org.broadinstitute.sting.tools",
|
||||
"org.broadinstitute.sting.gatk.datasources.reads.utilities");
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, JavaCommandLineFunction}
|
||||
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.sting.queue.util.ClassFieldCache
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: ami
|
||||
* Date: 12/11/12
|
||||
* Time: 2:04 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CatVariantsGatherer extends CatVariants with GatherFunction with RetryMemoryLimit{
|
||||
this.assumeSorted = true
|
||||
|
||||
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
override def freezeFieldValues() {
|
||||
this.reference = originalGATK.reference_sequence
|
||||
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
|
||||
this.outputFile = this.originalOutput
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue