add fast gather to UG; change UG to work with log-lessHMM (work in prograss)

This commit is contained in:
Ami Levy-Moonshine 2012-12-20 14:58:57 -05:00
parent 6bf31065e3
commit 6590039bc3
8 changed files with 27 additions and 16 deletions

View File

@ -27,18 +27,17 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pairhmm.*; import org.broadinstitute.sting.utils.pairhmm.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.*; import java.util.*;
public class LikelihoodCalculationEngine { public class LikelihoodCalculationEngine {
@ -98,12 +97,12 @@ public class LikelihoodCalculationEngine {
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) { for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
// evaluate the likelihood of the reads given those haplotypes // evaluate the likelihood of the reads given those haplotypes
stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue(), sampleEntry.getKey())); stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue()));
} }
return stratifiedReadMap; return stratifiedReadMap;
} }
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample ) { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads) {
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap(); final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
final int numHaplotypes = haplotypes.size(); final int numHaplotypes = haplotypes.size();

View File

@ -195,7 +195,7 @@ public abstract class CommandLineProgram {
clp.setupLoggerLevel(layout); clp.setupLoggerLevel(layout);
Class[] argumentSources = clp.getArgumentSources(); Class[] argumentSources = clp.getArgumentSources();
for (Class argumentSource : argumentSources) for (Class argumentSource : argumentSources)
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
parsedArgs = parser.parse(args); parsedArgs = parser.parse(args);

View File

@ -35,5 +35,6 @@ import java.lang.annotation.*;
@Target({ElementType.FIELD}) @Target({ElementType.FIELD})
public @interface Gather { public @interface Gather {
Class value() default Gather.class; Class value() default Gather.class;
String className() default "";
boolean enabled() default true; boolean enabled() default true;
} }

View File

@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -157,9 +158,11 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
/** /**
* A raw, unfiltered, highly sensitive callset in VCF format. * A raw, unfiltered, highly sensitive callset in VCF format.
*/ */
@Gather(className = "org.broadinstitute.sting.queue.extensions.gatk.CatVariantsGatherer") //TODO: check this gatherer
@Output(doc="File to which variants should be written",required=true) @Output(doc="File to which variants should be written",required=true)
protected VariantContextWriter writer = null; protected VariantContextWriter writer = null;
@Hidden @Hidden
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
protected PrintStream verboseWriter = null; protected PrintStream verboseWriter = null;

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM; import org.broadinstitute.sting.utils.pairhmm.ExactPairHMM;
import org.broadinstitute.sting.utils.pairhmm.LoglessCachingPairHMM;
import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM; import org.broadinstitute.sting.utils.pairhmm.OriginalPairHMM;
import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -101,6 +102,8 @@ public class PairHMMIndelErrorModel {
break; break;
case CACHING: case CACHING:
case LOGLESS_CACHING: case LOGLESS_CACHING:
pairHMM = new LoglessCachingPairHMM();
break;
default: default:
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT."); throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL and EXACT.");
} }
@ -371,7 +374,7 @@ public class PairHMMIndelErrorModel {
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals, readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotypeBases, readBases, readQuals,
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
contextLogGapContinuationProbabilities, startIndexInHaplotype, false); contextLogGapContinuationProbabilities, startIndexInHaplotype, previousHaplotypeSeen == null);
if (DEBUG) { if (DEBUG) {

View File

@ -204,7 +204,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
if ( PRIORITY_STRING == null && genotypeMergeOption == null) { if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
logger.info("Priority string not provided, using arbitrary genotyping order: "+priority); logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
} }
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption); samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);

View File

@ -23,7 +23,6 @@
*/ */
package org.broadinstitute.sting.queue.extensions.gatk; package org.broadinstitute.sting.queue.extensions.gatk;
import net.sf.samtools.BAMIndex; import net.sf.samtools.BAMIndex;
import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriter;
import org.broad.tribble.Tribble; import org.broad.tribble.Tribble;
@ -119,9 +118,14 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>(); List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType)) for (ArgumentSource argumentSource: parsingEngine.extractArgumentSources(classType))
if (!argumentSource.isDeprecated()) { if (!argumentSource.isDeprecated()) {
Class<?> gatherer = null; String gatherer = null;
if (argumentSource.field.isAnnotationPresent(Gather.class)) if (argumentSource.field.isAnnotationPresent(Gather.class)) {
gatherer = argumentSource.field.getAnnotation(Gather.class).value(); Gather gather = argumentSource.field.getAnnotation(Gather.class);
if(! "".equals(gather.className()))
gatherer = gather.className();
else
gatherer = gather.value().getName();
}
for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions()) for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
argumentFields.addAll(getArgumentFields(argumentDefinition, gatherer)); argumentFields.addAll(getArgumentFields(argumentDefinition, gatherer));
} }
@ -130,7 +134,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, Class<?> gatherer) { private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition, String gatherer) {
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList( return Arrays.asList(
new IntervalFileArgumentField(argumentDefinition), new IntervalFileArgumentField(argumentDefinition),
@ -154,7 +158,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
String gatherClass; String gatherClass;
if (gatherer != null) if (gatherer != null)
gatherClass = gatherer.getName(); gatherClass = gatherer;
else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType))
gatherClass = "BamGatherFunction"; gatherClass = "BamGatherFunction";
else if (VariantContextWriter.class.isAssignableFrom(argumentDefinition.argumentType)) else if (VariantContextWriter.class.isAssignableFrom(argumentDefinition.argumentType))

View File

@ -195,6 +195,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
private static final List<String> gatkPackages = Arrays.asList( private static final List<String> gatkPackages = Arrays.asList(
"org.broadinstitute.sting.gatk", "org.broadinstitute.sting.gatk",
"org.broadinstitute.sting.pipeline", "org.broadinstitute.sting.pipeline",
"org.broadinstitute.sting.tools",
"org.broadinstitute.sting.gatk.datasources.reads.utilities"); "org.broadinstitute.sting.gatk.datasources.reads.utilities");
/** /**