diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e1a1ed6d2..1be484075 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -28,7 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pairhmm.*; @@ -95,12 +97,12 @@ public class LikelihoodCalculationEngine { for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } // evaluate the likelihood of the reads given those haplotypes - stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue(), sampleEntry.getKey())); + stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue())); } return stratifiedReadMap; } - private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads, final String sample ) { + private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads) { final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap(); final int numHaplotypes = haplotypes.size(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index fb15a3722..36603f5c3 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -195,7 +195,7 @@ public abstract class CommandLineProgram { clp.setupLoggerLevel(layout); Class[] argumentSources = clp.getArgumentSources(); - for (Class argumentSource : argumentSources) + for (Class argumentSource : argumentSources) parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); parsedArgs = parser.parse(args); diff --git a/public/java/src/org/broadinstitute/sting/commandline/Gather.java b/public/java/src/org/broadinstitute/sting/commandline/Gather.java index d452f708e..62ef33421 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/Gather.java +++ b/public/java/src/org/broadinstitute/sting/commandline/Gather.java @@ -35,5 +35,6 @@ import java.lang.annotation.*; @Target({ElementType.FIELD}) public @interface Gather { Class value() default Gather.class; + String className() default ""; boolean enabled() default true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoveredByNSamplesSites.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoveredByNSamplesSites.java deleted file mode 100644 index ea31a58f1..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoveredByNSamplesSites.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.coverage; - -/** - * Created with IntelliJ IDEA. - * User: ami - * Date: 1/3/13 - * Time: 1:51 PM - * To change this template use File | Settings | File Templates. - */ - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.variant.variantcontext.Genotype; -import org.broadinstitute.variant.variantcontext.GenotypesContext; -import org.broadinstitute.variant.variantcontext.VariantContext; - -import java.io.File; -import java.io.PrintStream; -import java.util.Collection; -import java.util.Iterator; -import java.util.Map; - -@By(DataSource.REFERENCE_ORDERED_DATA) -public class CoveredByNSamplesSites extends RodWalker implements TreeReducible { - @Output - PrintStream out; - - @Output(fullName = "OutputIntervals", shortName = "intervals", doc = "Name of file for output intervals", required = true) - File intervalFile; - - @ArgumentCollection - protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - - - @Argument(fullName = "minCoverage", shortName = "minCov",doc = "only samples that have coverage higher then the min are taking into account", required = false) - int minCoverage = 10; - - @Argument(fullName = "samplePercentage", shortName = "percentage",doc = "only sites that have more than samplePercentage samples with minCoverage", required = false) - double samplePercentage = 0.9; - - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context){ - if(tracker == null) - return 0; - - Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); - if (vcs.size() == 0) - return 0; - - for (VariantContext vc : vcs){ - int countCoveredSamples = 0; - final GenotypesContext genotypes = vc.getGenotypes(); - final int numOfGenotype = genotypes.size(); - for (Genotype g : genotypes){ - if (g.getDP() > minCoverage) - countCoveredSamples++; - } - if (countCoveredSamples/numOfGenotype >= samplePercentage) - out.print(ref.getLocus()); - - } - - return 1; - } - - @Override - public Integer reduceInit() { return 0; } - - @Override - public Integer reduce(Integer value, Integer sum) { return value + sum; } - - @Override - public Integer treeReduce(Integer lhs, Integer rhs) { - return lhs + rhs; - } - - /** - * Tell the user the number of loci processed and close out the new variants file. - * - * @param result the number of loci seen. - */ - public void onTraversalDone(Integer result) { - logger.info("Processed " + result + " loci.\n"); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 7b82403b5..b5386ff6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -157,6 +157,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif /** * A raw, unfiltered, highly sensitive callset in VCF format. */ + @Gather(className = "org.broadinstitute.sting.queue.extensions.gatk.CatVariantsGatherer") @Output(doc="File to which variants should be written",required=true) protected VariantContextWriter writer = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 15ced4f0b..ab498ed7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM; +import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM; import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -101,8 +102,11 @@ public class PairHMMIndelErrorModel { break; case CACHING: case LOGLESS_CACHING: + pairHMM = new LoglessCachingPairHMM(); + System.err.println("warning: this option (LOGLESS_CACHING in UG) is still under development"); + break; default: - throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT."); + throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development)."); } // fill gap penalty table, affine naive model: @@ -332,6 +336,7 @@ public class PairHMMIndelErrorModel { getContextHomopolymerLength(readBases,hrunProfile); fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities); + boolean firstHap = true; for (Allele a: haplotypeMap.keySet()) { Haplotype haplotype = haplotypeMap.get(a); @@ -371,7 +376,7 @@ public class PairHMMIndelErrorModel { readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), - contextLogGapContinuationProbabilities, startIndexInHaplotype, false); + contextLogGapContinuationProbabilities, startIndexInHaplotype, firstHap); if (DEBUG) { @@ -383,6 +388,7 @@ public class PairHMMIndelErrorModel { perReadAlleleLikelihoodMap.add(p, a, readLikelihood); readLikelihoods[readIdx][j++] = readLikelihood; + firstHap = false; } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index eb671ff4a..78b89d615 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -205,7 +205,7 @@ public class CombineVariants extends RodWalker implements Tree if ( PRIORITY_STRING == null && genotypeMergeOption == null) { genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) - logger.info("Priority string not provided, using arbitrary genotyping order: "+priority); + logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority); } samples = sitesOnlyVCF ? Collections.emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 597a916f1..89605863b 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -23,7 +23,6 @@ */ package org.broadinstitute.sting.queue.extensions.gatk; - import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; @@ -119,9 +118,14 @@ public abstract class ArgumentDefinitionField extends ArgumentField { List argumentFields = new ArrayList(); for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType)) if (!argumentSource.isDeprecated()) { - Class gatherer = null; - if (argumentSource.field.isAnnotationPresent(Gather.class)) - gatherer = argumentSource.field.getAnnotation(Gather.class).value(); + String gatherer = null; + if (argumentSource.field.isAnnotationPresent(Gather.class)) { + Gather gather = argumentSource.field.getAnnotation(Gather.class); + if(! "".equals(gather.className())) + gatherer = gather.className(); + else + gatherer = gather.value().getName(); + } for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions()) argumentFields.addAll(getArgumentFields(argumentDefinition, gatherer)); } @@ -130,7 +134,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { private static final List intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); - private static List getArgumentFields(ArgumentDefinition argumentDefinition, Class gatherer) { + private static List getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) { if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { return Arrays.asList( new IntervalFileArgumentField(argumentDefinition), @@ -154,7 +158,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { String gatherClass; if (gatherer != null) - gatherClass = gatherer.getName(); + gatherClass = gatherer; else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) gatherClass = "BamGatherFunction"; else if (VariantContextWriter.class.isAssignableFrom(argumentDefinition.argumentType)) diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 060420e3b..bcdeb4d88 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -196,6 +196,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { "org.broadinstitute.sting.tools", "org.broadinstitute.sting.gatk", "org.broadinstitute.sting.pipeline", + "org.broadinstitute.sting.tools", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala new file mode 100644 index 000000000..0a89a4227 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, JavaCommandLineFunction} +import org.broadinstitute.sting.queue.function.scattergather.GatherFunction +import org.broadinstitute.sting.queue.util.ClassFieldCache +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor + +/** + * Created with IntelliJ IDEA. + * User: ami + * Date: 12/11/12 + * Time: 2:04 PM + * To change this template use File | Settings | File Templates. + */ +class CatVariantsGatherer extends CatVariants with GatherFunction with RetryMemoryLimit{ + this.assumeSorted = true + + private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] + + override def freezeFieldValues() { + this.reference = originalGATK.reference_sequence + this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } + this.outputFile = this.originalOutput + super.freezeFieldValues() + } + + + +}