diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index c6b80c308..728a6b6df 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.VariantContext; @@ -65,7 +63,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo model = new HaplotypeIndelErrorModel(maxReadDeletionLength, UAC.INSERTION_START_PROBABILITY, UAC.INSERTION_END_PROBABILITY, UAC.ALPHA_DELETION_PROBABILITY, HAPLOTYPE_SIZE, false, DEBUGOUT); alleleList = new ArrayList(); - getAlleleListFromVCF = UAC.GET_ALLELES_FROM_VCF; + getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING; } @@ -260,7 +258,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getVariantContexts(ref, "indels", null, ref.getLocus(), false, false) ) { + for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), false, false) ) { if( vc_input != null && vc_input.isIndel() && ref.getLocus().getStart() == vc_input.getStart()) { vc = vc_input; break; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index f131cf75d..1127f8d05 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -46,6 +46,11 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { DINDEL } + public enum GENOTYPING_MODE { + DISCOVERY, + GENOTYPE_GIVEN_ALLELES + } + protected UnifiedArgumentCollection UAC; protected Logger logger; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 329769ac7..cc86a75ed 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; @@ -44,8 +45,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // the alternate allele with the largest sum of quality scores protected Byte bestAlternateAllele = null; + private final boolean useAlleleFromVCF; + protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); + useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; } public Allele getLikelihoods(RefMetaDataTracker tracker, @@ -63,18 +67,29 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC Allele refAllele = Allele.create(refBase, true); // find the alternate allele with the largest sum of quality scores - if ( alternateAlleleToUse == null ) - initializeBestAlternateAllele(refBase, contexts); - else + if ( alternateAlleleToUse != null ) { bestAlternateAllele = alternateAlleleToUse.getBases()[0]; + } else if ( useAlleleFromVCF ) { + final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true); + if ( vcInput == null ) + return null; + if ( !vcInput.isBiallelic() ) { + logger.info("Record at position " + ref.getLocus() + " is not bi-allelic; skipping..."); + return null; + } + if ( !vcInput.isSNP() ) { + logger.info("Record at position " + ref.getLocus() + " is not a SNP; skipping..."); + return null; + } + bestAlternateAllele = vcInput.getAlternateAllele(0).getBases()[0]; + } else { + initializeBestAlternateAllele(refBase, contexts); + } // if there are no non-ref bases... if ( bestAlternateAllele == null ) { - // did we trigger on the provided track? - boolean atTriggerTrack = tracker.getReferenceMetaData(UnifiedGenotyperEngine.TRIGGER_TRACK_NAME, false).size() > 0; - - // if we don't want all bases, then we don't need to calculate genotype likelihoods - if ( !atTriggerTrack && !UAC.ALL_BASES_MODE && !UAC.GENOTYPE_MODE ) + // if we only want variants, then we don't need to calculate genotype likelihoods + if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) return refAllele; // otherwise, choose any alternate allele (it doesn't really matter) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 4d94f041a..aae84fbba 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -83,8 +83,7 @@ public class UGCallVariants extends RodWalker { headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); - if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING || - UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING ) + if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ) headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); // initialize the header diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 0514d2ad8..6bcef12e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -44,11 +44,11 @@ public class UnifiedArgumentCollection { @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; - @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) - public boolean GENOTYPE_MODE = false; + @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) + public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; - @Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false) - public boolean ALL_BASES_MODE = false; + @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) + public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false) public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0; @@ -56,12 +56,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false) public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; - @Argument(fullName = "trigger_min_confidence_threshold_for_calling", shortName = "trig_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be called", required = false) - public double TRIGGER_CONFIDENCE_FOR_CALLING = 30.0; - - @Argument(fullName = "trigger_min_confidence_threshold_for_emitting", shortName = "trig_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false) - public double TRIGGER_CONFIDENCE_FOR_EMITTING = 30.0; - @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false) public boolean NO_SLOD = false; @@ -90,9 +84,6 @@ public class UnifiedArgumentCollection { // indel-related arguments - @Argument(fullName = "get_indel_alleles_from_vcf", shortName = "getIndelAllelesFromVCF", doc = "Get reference/alt alleles for indel genotyping from VCF", required = false) - public boolean GET_ALLELES_FROM_VCF = false; - @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false) public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5; @@ -101,32 +92,39 @@ public class UnifiedArgumentCollection { @Argument(fullName = "insertionStartProbability", shortName = "insertionStartProbability", doc = "Heterozygosity for indel calling", required = false) public double INSERTION_START_PROBABILITY = 1e-3; + @Argument(fullName = "insertionEndProbability", shortName = "insertionEndProbability", doc = "Heterozygosity for indel calling", required = false) public double INSERTION_END_PROBABILITY = 0.5; + @Argument(fullName = "alphaDeletionProbability", shortName = "alphaDeletionProbability", doc = "Heterozygosity for indel calling", required = false) public double ALPHA_DELETION_PROBABILITY = 1e-3; - + @Deprecated + @Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false) + private Boolean ALL_BASES_DEPRECATED = false; + + @Deprecated + @Argument(fullName="genotype", shortName="genotype", doc="Please use --output_mode EMIT_ALL_CONFIDENT_SITES instead" ,required=false) + private Boolean GENOTYPE_DEPRECATED = false; + + public UnifiedArgumentCollection clone() { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.GLmodel = GLmodel; uac.heterozygosity = heterozygosity; uac.PCR_error = PCR_error; - uac.GENOTYPE_MODE = GENOTYPE_MODE; - uac.ALL_BASES_MODE = ALL_BASES_MODE; + uac.GenotypingMode = GenotypingMode; + uac.OutputMode = OutputMode; uac.NO_SLOD = NO_SLOD; uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; - uac.TRIGGER_CONFIDENCE_FOR_CALLING = TRIGGER_CONFIDENCE_FOR_CALLING; - uac.TRIGGER_CONFIDENCE_FOR_EMITTING = TRIGGER_CONFIDENCE_FOR_EMITTING; uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE; uac.MIN_MAPPING_QUALTY_SCORE = MIN_MAPPING_QUALTY_SCORE; uac.MAX_MISMATCHES = MAX_MISMATCHES; uac.USE_BADLY_MATED_READS = USE_BADLY_MATED_READS; uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION; - uac.GET_ALLELES_FROM_VCF = GET_ALLELES_FROM_VCF; uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING; uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY; uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 1aae82eda..1b362f9eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -160,8 +160,7 @@ public class UnifiedGenotyper extends LocusWalker GLs = new HashMap(); + Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse); if (refAllele != null) @@ -289,7 +295,7 @@ public class UnifiedGenotyperEngine { double PofF = Math.min(sum, 1.0); // deal with precision errors double phredScaledConfidence; - if ( bestAFguess != 0 ) { + if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; @@ -306,11 +312,8 @@ public class UnifiedGenotyperEngine { } } - // did we trigger on the provided track? - boolean atTriggerTrack = tracker.getReferenceMetaData(TRIGGER_TRACK_NAME, false).size() > 0; - // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( !UAC.ALL_BASES_MODE && !passesEmitThreshold(phredScaledConfidence, bestAFguess, atTriggerTrack) ) { + if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { // technically, at this point our confidence in a reference call isn't accurately estimated // because it didn't take into account samples with no data, so let's get a better estimate return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), true, 1.0 - PofF); @@ -380,12 +383,12 @@ public class UnifiedGenotyperEngine { Set myAlleles = vc.getAlleles(); // strip out the alternate allele if it's a ref call - if ( bestAFguess == 0 ) { + if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { myAlleles = new HashSet(1); myAlleles.add(vc.getReference()); } VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, - myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); + myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes); if ( annotationEngine != null ) { // first off, we want to use the *unfiltered* context for the annotations @@ -400,7 +403,7 @@ public class UnifiedGenotyperEngine { vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element. } - VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack)); + VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence)); call.setRefBase(refContext.getBase()); return call; } @@ -440,7 +443,7 @@ public class UnifiedGenotyperEngine { ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE); // don't call when there is no coverage - if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE ) + if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ) return null; // stratify the AlignmentContext and cut by sample @@ -561,7 +564,7 @@ public class UnifiedGenotyperEngine { // now, test for bad pileups // in all_bases mode, it doesn't matter - if ( UAC.ALL_BASES_MODE ) + if ( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES ) return true; // is there no coverage? @@ -620,16 +623,12 @@ public class UnifiedGenotyperEngine { } } - protected boolean passesEmitThreshold(double conf, int bestAFguess, boolean atTriggerTrack) { - return (atTriggerTrack ? - (conf >= Math.min(UAC.TRIGGER_CONFIDENCE_FOR_CALLING, UAC.TRIGGER_CONFIDENCE_FOR_EMITTING)) : - ((UAC.GENOTYPE_MODE || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING))); + protected boolean passesEmitThreshold(double conf, int bestAFguess) { + return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); } - protected boolean passesCallThreshold(double conf, boolean atTriggerTrack) { - return (atTriggerTrack ? - (conf >= UAC.TRIGGER_CONFIDENCE_FOR_CALLING) : - (conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING)); + protected boolean passesCallThreshold(double conf) { + return conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING; } protected void computeAlleleFrequencyPriors(int N) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java index 58632c708..71d8bd1ed 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; @@ -80,7 +79,7 @@ public class LocusMismatchWalker extends LocusWalker implements public void initialize() { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); - uac.ALL_BASES_MODE = true; + uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; ug = new UnifiedGenotyperEngine(getToolkit(), uac); // print the header diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java index 2144ad81f..58d2c4646 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java @@ -68,7 +68,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker, Strin public void initialize() { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.STANDARD_CONFIDENCE_FOR_CALLING = uac.STANDARD_CONFIDENCE_FOR_EMITTING = confidence; - uac.ALL_BASES_MODE = true; + uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; UG = new UnifiedGenotyperEngine(getToolkit(), uac); out.println("#locus\tid\tdownsampled_coverage\tpct_coverage\titeration\tref\teval_call\tcomp_call\tvariant_concordance\tgenotype_concordance"); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java index 9b71c8d5c..37a522dc1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java @@ -120,7 +120,7 @@ public class ValidationGenotyper extends LocusWalker result = executeTest("test MultiSample Pilot2", spec1).getFirst(); + + WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( + baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, + Arrays.asList(md5)); + executeTest("test MultiSample Pilot2 with alleles passed in", spec2); + } + + @Test + public void testWithAllelesPassedIn() { + WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( + baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, + Arrays.asList("e95c545b8ae06f0721f260125cfbe1f0")); + executeTest("test MultiSample Pilot2 with alleles passed in", spec1); + + WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( + baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, + Arrays.asList("6c96d76b9bc3aade0c768d7c657ae210")); + executeTest("test MultiSample Pilot2 with alleles passed in", spec2); } @Test @@ -42,7 +64,7 @@ public class WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, Arrays.asList("8da08fe12bc0d95e548fe63681997038")); - executeTest("testSingleSamplePilot2", spec); + executeTest("test SingleSample Pilot2", spec); } // -------------------------------------------------------------------------------------------------------------- @@ -58,7 +80,7 @@ public class WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, Arrays.asList("gz"), Arrays.asList(COMPRESSED_OUTPUT_MD5)); - executeTest("testCompressedOutput", spec); + executeTest("test compressed output", spec); } // todo -- fixme @@ -103,16 +125,28 @@ public class // -------------------------------------------------------------------------------------------------------------- @Test - public void testParameter() { + public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "-genotype", "4ffcb1e1f20ce175783c32c30deef8db" ); - e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" ); - e.put( "-all_bases", "3d98205a31a133c11e518e095dc7ab65" ); e.put( "--min_base_quality_score 26", "5f1cfb9c7f82e6414d5db7aa344813ac" ); e.put( "--min_mapping_quality_score 26", "6c3ad441f3a23ade292549b1dea80932" ); e.put( "--max_mismatches_in_40bp_window 5", "5ecaf4281410b67e8e2e164f2ea0d58a" ); e.put( "--p_nonref_model GRID_SEARCH", "17ffb56d078fdde335a79773e9534ce7" ); + for ( Map.Entry entry : e.entrySet() ) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1, + Arrays.asList(entry.getValue())); + executeTest(String.format("test calling parameter[%s]", entry.getKey()), spec); + } + } + + @Test + public void testOutputParameter() { + HashMap e = new HashMap(); + e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "4ffcb1e1f20ce175783c32c30deef8db" ); + e.put( "--output_mode EMIT_ALL_SITES", "3d98205a31a133c11e518e095dc7ab65" ); + for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1, @@ -126,12 +160,12 @@ public class WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, Arrays.asList("17ffb56d078fdde335a79773e9534ce7")); - executeTest("testConfidence1", spec1); + executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, Arrays.asList("d49ec8c1476cecb8e3153894cc0f6662")); - executeTest("testConfidence2", spec2); + executeTest("test confidence 2", spec2); } // -------------------------------------------------------------------------------------------------------------- @@ -149,7 +183,7 @@ public class WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, Arrays.asList(entry.getValue())); - executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); + executeTest(String.format("test heterozyosity[%s]", entry.getKey()), spec); } } @@ -168,6 +202,6 @@ public class 1, Arrays.asList("5974d8c21d27d014e2d0bed695b0b42e")); - executeTest(String.format("testMultiTechnologies"), spec); + executeTest(String.format("test multiple technologies"), spec); } } diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala index 3e6379149..1cb4b82af 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala @@ -5,6 +5,7 @@ import org.broadinstitute.sting.queue.util._ import java.io.File import org.broadinstitute.sting.datasources.pipeline.Pipeline import org.broadinstitute.sting.gatk.DownsampleType +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.utils.yaml.YamlUtils import org.broadinstitute.sting.queue.function.CommandLineFunction @@ -72,8 +73,7 @@ class ProjectManagement(stingPath: String) { calc.scatterCount = if (bams.size < 5 ) 1 else if (bams.size < 50) 60 else 120 calc.min_base_quality_score = Some(22) calc.min_mapping_quality_score = Some(20) - calc.genotype = true - calc.output_all_callable_bases = true + //calc.genotyping_mode = Option[GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES] calc.out = outVCF calc.rodBind :+= new RodBind("allele","VCF",alleleVCF) calc.intervals :+= intervals @@ -97,4 +97,4 @@ class ProjectManagement(stingPath: String) { def swapExt(file: File, oldExtension: String, newExtension: String) = new File(file.getName.stripSuffix(oldExtension) + newExtension) -} \ No newline at end of file +}