From 6590039bc3ed0b7e4d01619b5250aa54de67fb58 Mon Sep 17 00:00:00 2001 From: Ami Levy-Moonshine Date: Thu, 20 Dec 2012 14:58:57 -0500 Subject: [PATCH 1/5] add fast gather to UG; change UG to work with log-lessHMM (work in prograss) --- .../LikelihoodCalculationEngine.java | 13 ++++++------- .../sting/commandline/CommandLineProgram.java | 2 +- .../broadinstitute/sting/commandline/Gather.java | 1 + .../gatk/walkers/genotyper/UnifiedGenotyper.java | 3 +++ .../walkers/indels/PairHMMIndelErrorModel.java | 5 ++++- .../walkers/variantutils/CombineVariants.java | 2 +- .../extensions/gatk/ArgumentDefinitionField.java | 16 ++++++++++------ .../extensions/gatk/GATKExtensionsGenerator.java | 1 + 8 files changed, 27 insertions(+), 16 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 018102893..3d05bbd1b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -27,18 +27,17 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pairhmm.*; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; import java.util.*; public class LikelihoodCalculationEngine { @@ -98,12 +97,12 @@ public class LikelihoodCalculationEngine { for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } // evaluate the likelihood of the reads given those haplotypes - stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue(), sampleEntry.getKey())); + stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue())); } return stratifiedReadMap; } - private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads, final String sample ) { + private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads) { final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap(); final int numHaplotypes = haplotypes.size(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index fb15a3722..36603f5c3 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -195,7 +195,7 @@ public abstract class CommandLineProgram { clp.setupLoggerLevel(layout); Class[] argumentSources = clp.getArgumentSources(); - for (Class argumentSource : argumentSources) + for (Class argumentSource : argumentSources) parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); parsedArgs = parser.parse(args); diff --git a/public/java/src/org/broadinstitute/sting/commandline/Gather.java b/public/java/src/org/broadinstitute/sting/commandline/Gather.java index d452f708e..62ef33421 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/Gather.java +++ b/public/java/src/org/broadinstitute/sting/commandline/Gather.java @@ -35,5 +35,6 @@ import java.lang.annotation.*; @Target({ElementType.FIELD}) public @interface Gather { Class value() default Gather.class; + String className() default ""; boolean enabled() default true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 36be2e7c6..08c854d2a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -157,9 +158,11 @@ public class UnifiedGenotyper extends LocusWalker, Unif /** * A raw, unfiltered, highly sensitive callset in VCF format. */ + @Gather(className = "org.broadinstitute.sting.queue.extensions.gatk.CatVariantsGatherer") //TODO: check this gatherer @Output(doc="File to which variants should be written",required=true) protected VariantContextWriter writer = null; + @Hidden @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false) protected PrintStream verboseWriter = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 848aaf8a3..737c24fb7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM; +import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM; import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -101,6 +102,8 @@ public class PairHMMIndelErrorModel { break; case CACHING: case LOGLESS_CACHING: + pairHMM = new LoglessCachingPairHMM(); + break; default: throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT."); } @@ -371,7 +374,7 @@ public class PairHMMIndelErrorModel { readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), - contextLogGapContinuationProbabilities, startIndexInHaplotype, false); + contextLogGapContinuationProbabilities, startIndexInHaplotype, previousHaplotypeSeen == null); if (DEBUG) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 68fac7631..8c3271d6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -204,7 +204,7 @@ public class CombineVariants extends RodWalker implements Tree if ( PRIORITY_STRING == null && genotypeMergeOption == null) { genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) - logger.info("Priority string not provided, using arbitrary genotyping order: "+priority); + logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority); } samples = sitesOnlyVCF ? Collections.emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 2226c6458..a0e3bced3 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -23,7 +23,6 @@ */ package org.broadinstitute.sting.queue.extensions.gatk; - import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; @@ -119,9 +118,14 @@ public abstract class ArgumentDefinitionField extends ArgumentField { List argumentFields = new ArrayList(); for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType)) if (!argumentSource.isDeprecated()) { - Class gatherer = null; - if (argumentSource.field.isAnnotationPresent(Gather.class)) - gatherer = argumentSource.field.getAnnotation(Gather.class).value(); + String gatherer = null; + if (argumentSource.field.isAnnotationPresent(Gather.class)) { + Gather gather = argumentSource.field.getAnnotation(Gather.class); + if(! "".equals(gather.className())) + gatherer = gather.className(); + else + gatherer = gather.value().getName(); + } for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions()) argumentFields.addAll(getArgumentFields(argumentDefinition, gatherer)); } @@ -130,7 +134,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { private static final List intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); - private static List getArgumentFields(ArgumentDefinition argumentDefinition, Class gatherer) { + private static List getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) { if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { return Arrays.asList( new IntervalFileArgumentField(argumentDefinition), @@ -154,7 +158,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { String gatherClass; if (gatherer != null) - gatherClass = gatherer.getName(); + gatherClass = gatherer; else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) gatherClass = "BamGatherFunction"; else if (VariantContextWriter.class.isAssignableFrom(argumentDefinition.argumentType)) diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 883436582..103a71136 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -195,6 +195,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { private static final List gatkPackages = Arrays.asList( "org.broadinstitute.sting.gatk", "org.broadinstitute.sting.pipeline", + "org.broadinstitute.sting.tools", "org.broadinstitute.sting.gatk.datasources.reads.utilities"); /** From 3ca3fd4b3e83922c4c0959d650119597e29b5702 Mon Sep 17 00:00:00 2001 From: Ami Levy-Moonshine Date: Fri, 21 Dec 2012 11:06:12 -0500 Subject: [PATCH 2/5] keep working on loglessHMM in UG --- .../sting/gatk/walkers/indels/PairHMMIndelErrorModel.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 737c24fb7..b45b396e2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -335,6 +335,7 @@ public class PairHMMIndelErrorModel { getContextHomopolymerLength(readBases,hrunProfile); fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities); + boolean firstHap = true; for (Allele a: haplotypeMap.keySet()) { Haplotype haplotype = haplotypeMap.get(a); @@ -374,7 +375,7 @@ public class PairHMMIndelErrorModel { readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), - contextLogGapContinuationProbabilities, startIndexInHaplotype, previousHaplotypeSeen == null); + contextLogGapContinuationProbabilities, startIndexInHaplotype, firstHap); if (DEBUG) { @@ -386,6 +387,7 @@ public class PairHMMIndelErrorModel { perReadAlleleLikelihoodMap.add(p, a, readLikelihood); readLikelihoods[readIdx][j++] = readLikelihood; + firstHap = false; } } } From fe427cdd77eac97044792d9fcd415c6f3f4b2b7b Mon Sep 17 00:00:00 2001 From: Ami Levy-Moonshine Date: Wed, 26 Dec 2012 13:06:36 -0500 Subject: [PATCH 3/5] add few queue script and the CatVariantsGatherer scala class --- .../extensions/gatk/CatVariantsGatherer.scala | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala new file mode 100644 index 000000000..0a89a4227 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/CatVariantsGatherer.scala @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, JavaCommandLineFunction} +import org.broadinstitute.sting.queue.function.scattergather.GatherFunction +import org.broadinstitute.sting.queue.util.ClassFieldCache +import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor + +/** + * Created with IntelliJ IDEA. + * User: ami + * Date: 12/11/12 + * Time: 2:04 PM + * To change this template use File | Settings | File Templates. + */ +class CatVariantsGatherer extends CatVariants with GatherFunction with RetryMemoryLimit{ + this.assumeSorted = true + + private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] + + override def freezeFieldValues() { + this.reference = originalGATK.reference_sequence + this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } + this.outputFile = this.originalOutput + super.freezeFieldValues() + } + + + +} From 2018285a39a151f2d64bed565c5ab0ac11fde191 Mon Sep 17 00:00:00 2001 From: Ami Levy-Moonshine Date: Thu, 3 Jan 2013 13:41:03 -0500 Subject: [PATCH 5/5] better error message --- .../sting/gatk/walkers/indels/PairHMMIndelErrorModel.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index b45b396e2..5f6ed561d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -103,9 +103,10 @@ public class PairHMMIndelErrorModel { case CACHING: case LOGLESS_CACHING: pairHMM = new LoglessCachingPairHMM(); + System.err.println("warning: this option (LOGLESS_CACHING in UG) is still under development"); break; default: - throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT."); + throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development)."); } // fill gap penalty table, affine naive model: