Fixing up the output system of the Unified Genotyper. Deprecating the -all_bases and -genotype arguments. Adding instead the --output_mode (EMIT_VARIANTS_ONLY, EMIT_ALL_CONFIDENT_SITES, EMIT_ALL_SITES) and --genotyping_mode (DISCOVERY, GENOTYPE_GIVEN_ALLELES) arguments. UG now does the correct thing when passed alleles (bound to the 'alleles' rod) to use for genotyping; added several integration tests to cover this case. This commit will break the batched calls merging script, but Chris knows this and is ready for it...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5288 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-02-22 06:07:18 +00:00
parent 63f40215b3
commit 318035c147
12 changed files with 123 additions and 77 deletions

View File

@ -25,8 +25,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.VariantContext;
@ -65,7 +63,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
model = new HaplotypeIndelErrorModel(maxReadDeletionLength, UAC.INSERTION_START_PROBABILITY,
UAC.INSERTION_END_PROBABILITY, UAC.ALPHA_DELETION_PROBABILITY, HAPLOTYPE_SIZE, false, DEBUGOUT);
alleleList = new ArrayList<Allele>();
getAlleleListFromVCF = UAC.GET_ALLELES_FROM_VCF;
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
}
@ -260,7 +258,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
if (getAlleleListFromVCF) {
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "indels", null, ref.getLocus(), false, false) ) {
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), false, false) ) {
if( vc_input != null && vc_input.isIndel() && ref.getLocus().getStart() == vc_input.getStart()) {
vc = vc_input;
break;

View File

@ -46,6 +46,11 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
DINDEL
}
public enum GENOTYPING_MODE {
DISCOVERY,
GENOTYPE_GIVEN_ALLELES
}
protected UnifiedArgumentCollection UAC;
protected Logger logger;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
@ -44,8 +45,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
// the alternate allele with the largest sum of quality scores
protected Byte bestAlternateAllele = null;
private final boolean useAlleleFromVCF;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
}
public Allele getLikelihoods(RefMetaDataTracker tracker,
@ -63,18 +67,29 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
Allele refAllele = Allele.create(refBase, true);
// find the alternate allele with the largest sum of quality scores
if ( alternateAlleleToUse == null )
initializeBestAlternateAllele(refBase, contexts);
else
if ( alternateAlleleToUse != null ) {
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
} else if ( useAlleleFromVCF ) {
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true);
if ( vcInput == null )
return null;
if ( !vcInput.isBiallelic() ) {
logger.info("Record at position " + ref.getLocus() + " is not bi-allelic; skipping...");
return null;
}
if ( !vcInput.isSNP() ) {
logger.info("Record at position " + ref.getLocus() + " is not a SNP; skipping...");
return null;
}
bestAlternateAllele = vcInput.getAlternateAllele(0).getBases()[0];
} else {
initializeBestAlternateAllele(refBase, contexts);
}
// if there are no non-ref bases...
if ( bestAlternateAllele == null ) {
// did we trigger on the provided track?
boolean atTriggerTrack = tracker.getReferenceMetaData(UnifiedGenotyperEngine.TRIGGER_TRACK_NAME, false).size() > 0;
// if we don't want all bases, then we don't need to calculate genotype likelihoods
if ( !atTriggerTrack && !UAC.ALL_BASES_MODE && !UAC.GENOTYPE_MODE )
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return refAllele;
// otherwise, choose any alternate allele (it doesn't really matter)

View File

@ -83,8 +83,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING )
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
// initialize the header

View File

@ -44,11 +44,11 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public boolean GENOTYPE_MODE = false;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false)
public boolean ALL_BASES_MODE = false;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
@ -56,12 +56,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
@Argument(fullName = "trigger_min_confidence_threshold_for_calling", shortName = "trig_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be called", required = false)
public double TRIGGER_CONFIDENCE_FOR_CALLING = 30.0;
@Argument(fullName = "trigger_min_confidence_threshold_for_emitting", shortName = "trig_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 30.0;
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
@ -90,9 +84,6 @@ public class UnifiedArgumentCollection {
// indel-related arguments
@Argument(fullName = "get_indel_alleles_from_vcf", shortName = "getIndelAllelesFromVCF", doc = "Get reference/alt alleles for indel genotyping from VCF", required = false)
public boolean GET_ALLELES_FROM_VCF = false;
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
@ -101,32 +92,39 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "insertionStartProbability", shortName = "insertionStartProbability", doc = "Heterozygosity for indel calling", required = false)
public double INSERTION_START_PROBABILITY = 1e-3;
@Argument(fullName = "insertionEndProbability", shortName = "insertionEndProbability", doc = "Heterozygosity for indel calling", required = false)
public double INSERTION_END_PROBABILITY = 0.5;
@Argument(fullName = "alphaDeletionProbability", shortName = "alphaDeletionProbability", doc = "Heterozygosity for indel calling", required = false)
public double ALPHA_DELETION_PROBABILITY = 1e-3;
@Deprecated
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
private Boolean ALL_BASES_DEPRECATED = false;
@Deprecated
@Argument(fullName="genotype", shortName="genotype", doc="Please use --output_mode EMIT_ALL_CONFIDENT_SITES instead" ,required=false)
private Boolean GENOTYPE_DEPRECATED = false;
public UnifiedArgumentCollection clone() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.GLmodel = GLmodel;
uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error;
uac.GENOTYPE_MODE = GENOTYPE_MODE;
uac.ALL_BASES_MODE = ALL_BASES_MODE;
uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode;
uac.NO_SLOD = NO_SLOD;
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.TRIGGER_CONFIDENCE_FOR_CALLING = TRIGGER_CONFIDENCE_FOR_CALLING;
uac.TRIGGER_CONFIDENCE_FOR_EMITTING = TRIGGER_CONFIDENCE_FOR_EMITTING;
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
uac.MIN_MAPPING_QUALTY_SCORE = MIN_MAPPING_QUALTY_SCORE;
uac.MAX_MISMATCHES = MAX_MISMATCHES;
uac.USE_BADLY_MATED_READS = USE_BADLY_MATED_READS;
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
uac.GET_ALLELES_FROM_VCF = GET_ALLELES_FROM_VCF;
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY;

View File

@ -160,8 +160,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY));
// FILTER fields
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING )
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
return headerInfo;

View File

@ -59,9 +59,14 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
public class UnifiedGenotyperEngine {
public static final String TRIGGER_TRACK_NAME = "trigger";
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public enum OUTPUT_MODE {
EMIT_VARIANTS_ONLY,
EMIT_ALL_CONFIDENT_SITES,
EMIT_ALL_SITES
}
// the unified argument collection
private UnifiedArgumentCollection UAC = null;
@ -187,6 +192,7 @@ public class UnifiedGenotyperEngine {
}
Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse);
if (refAllele != null)
@ -289,7 +295,7 @@ public class UnifiedGenotyperEngine {
double PofF = Math.min(sum, 1.0); // deal with precision errors
double phredScaledConfidence;
if ( bestAFguess != 0 ) {
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
@ -306,11 +312,8 @@ public class UnifiedGenotyperEngine {
}
}
// did we trigger on the provided track?
boolean atTriggerTrack = tracker.getReferenceMetaData(TRIGGER_TRACK_NAME, false).size() > 0;
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( !UAC.ALL_BASES_MODE && !passesEmitThreshold(phredScaledConfidence, bestAFguess, atTriggerTrack) ) {
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), true, 1.0 - PofF);
@ -380,12 +383,12 @@ public class UnifiedGenotyperEngine {
Set<Allele> myAlleles = vc.getAlleles();
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 ) {
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
myAlleles.add(vc.getReference());
}
VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
if ( annotationEngine != null ) {
// first off, we want to use the *unfiltered* context for the annotations
@ -400,7 +403,7 @@ public class UnifiedGenotyperEngine {
vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element.
}
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack));
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence));
call.setRefBase(refContext.getBase());
return call;
}
@ -440,7 +443,7 @@ public class UnifiedGenotyperEngine {
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
// don't call when there is no coverage
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
return null;
// stratify the AlignmentContext and cut by sample
@ -561,7 +564,7 @@ public class UnifiedGenotyperEngine {
// now, test for bad pileups
// in all_bases mode, it doesn't matter
if ( UAC.ALL_BASES_MODE )
if ( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES )
return true;
// is there no coverage?
@ -620,16 +623,12 @@ public class UnifiedGenotyperEngine {
}
}
protected boolean passesEmitThreshold(double conf, int bestAFguess, boolean atTriggerTrack) {
return (atTriggerTrack ?
(conf >= Math.min(UAC.TRIGGER_CONFIDENCE_FOR_CALLING, UAC.TRIGGER_CONFIDENCE_FOR_EMITTING)) :
((UAC.GENOTYPE_MODE || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING)));
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
}
protected boolean passesCallThreshold(double conf, boolean atTriggerTrack) {
return (atTriggerTrack ?
(conf >= UAC.TRIGGER_CONFIDENCE_FOR_CALLING) :
(conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING));
protected boolean passesCallThreshold(double conf) {
return conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING;
}
protected void computeAlleleFrequencyPriors(int N) {

View File

@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
@ -80,7 +79,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
public void initialize() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.ALL_BASES_MODE = true;
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
// print the header

View File

@ -68,7 +68,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
public void initialize() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.STANDARD_CONFIDENCE_FOR_CALLING = uac.STANDARD_CONFIDENCE_FOR_EMITTING = confidence;
uac.ALL_BASES_MODE = true;
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
UG = new UnifiedGenotyperEngine(getToolkit(), uac);
out.println("#locus\tid\tdownsampled_coverage\tpct_coverage\titeration\tref\teval_call\tcomp_call\tvariant_concordance\tgenotype_concordance");

View File

@ -120,7 +120,7 @@ public class ValidationGenotyper extends LocusWalker<ValidationGenotyper.Counted
}
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.ALL_BASES_MODE = true;
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
engine = new UnifiedGenotyperEngine(getToolkit(),uac);
logger.info( "Overlapping samples = " + overlappingSamples );

View File

@ -3,8 +3,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// ********************************************************************************** //
@ -18,7 +20,7 @@ public class
// --------------------------------------------------------------------------------------------------------------
//
// testing joint estimation model
// testing normal calling
//
// --------------------------------------------------------------------------------------------------------------
@Test
@ -26,15 +28,35 @@ public class
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("e7514b0f1f2df1ca42815b5c45775f36"));
executeTest("testMultiSamplePilot1", spec);
executeTest("test MultiSample Pilot1", spec);
}
@Test
public void testMultiSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
public void testMultiSamplePilot2AndRecallingWithAlleles() {
String md5 = "49ae129435063f47f0faf00337eb8bf7";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("49ae129435063f47f0faf00337eb8bf7"));
executeTest("testMultiSamplePilot2", spec);
Arrays.asList(md5));
List<File> result = executeTest("test MultiSample Pilot2", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList(md5));
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
}
@Test
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("e95c545b8ae06f0721f260125cfbe1f0"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("6c96d76b9bc3aade0c768d7c657ae210"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
}
@Test
@ -42,7 +64,7 @@ public class
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("8da08fe12bc0d95e548fe63681997038"));
executeTest("testSingleSamplePilot2", spec);
executeTest("test SingleSample Pilot2", spec);
}
// --------------------------------------------------------------------------------------------------------------
@ -58,7 +80,7 @@ public class
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("gz"), Arrays.asList(COMPRESSED_OUTPUT_MD5));
executeTest("testCompressedOutput", spec);
executeTest("test compressed output", spec);
}
// todo -- fixme
@ -103,16 +125,28 @@ public class
// --------------------------------------------------------------------------------------------------------------
@Test
public void testParameter() {
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "4ffcb1e1f20ce175783c32c30deef8db" );
e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" );
e.put( "-all_bases", "3d98205a31a133c11e518e095dc7ab65" );
e.put( "--min_base_quality_score 26", "5f1cfb9c7f82e6414d5db7aa344813ac" );
e.put( "--min_mapping_quality_score 26", "6c3ad441f3a23ade292549b1dea80932" );
e.put( "--max_mismatches_in_40bp_window 5", "5ecaf4281410b67e8e2e164f2ea0d58a" );
e.put( "--p_nonref_model GRID_SEARCH", "17ffb56d078fdde335a79773e9534ce7" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("test calling parameter[%s]", entry.getKey()), spec);
}
}
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "4ffcb1e1f20ce175783c32c30deef8db" );
e.put( "--output_mode EMIT_ALL_SITES", "3d98205a31a133c11e518e095dc7ab65" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
@ -126,12 +160,12 @@ public class
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("17ffb56d078fdde335a79773e9534ce7"));
executeTest("testConfidence1", spec1);
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("d49ec8c1476cecb8e3153894cc0f6662"));
executeTest("testConfidence2", spec2);
executeTest("test confidence 2", spec2);
}
// --------------------------------------------------------------------------------------------------------------
@ -149,7 +183,7 @@ public class
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
executeTest(String.format("test heterozyosity[%s]", entry.getKey()), spec);
}
}
@ -168,6 +202,6 @@ public class
1,
Arrays.asList("5974d8c21d27d014e2d0bed695b0b42e"));
executeTest(String.format("testMultiTechnologies"), spec);
executeTest(String.format("test multiple technologies"), spec);
}
}

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.queue.util._
import java.io.File
import org.broadinstitute.sting.datasources.pipeline.Pipeline
import org.broadinstitute.sting.gatk.DownsampleType
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.utils.yaml.YamlUtils
import org.broadinstitute.sting.queue.function.CommandLineFunction
@ -72,8 +73,7 @@ class ProjectManagement(stingPath: String) {
calc.scatterCount = if (bams.size < 5 ) 1 else if (bams.size < 50) 60 else 120
calc.min_base_quality_score = Some(22)
calc.min_mapping_quality_score = Some(20)
calc.genotype = true
calc.output_all_callable_bases = true
//calc.genotyping_mode = Option[GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES]
calc.out = outVCF
calc.rodBind :+= new RodBind("allele","VCF",alleleVCF)
calc.intervals :+= intervals
@ -97,4 +97,4 @@ class ProjectManagement(stingPath: String) {
def swapExt(file: File, oldExtension: String, newExtension: String) =
new File(file.getName.stripSuffix(oldExtension) + newExtension)
}
}