Fixing up the output system of the Unified Genotyper. Deprecating the -all_bases and -genotype arguments. Adding instead the --output_mode (EMIT_VARIANTS_ONLY, EMIT_ALL_CONFIDENT_SITES, EMIT_ALL_SITES) and --genotyping_mode (DISCOVERY, GENOTYPE_GIVEN_ALLELES) arguments. UG now does the correct thing when passed alleles (bound to the 'alleles' rod) to use for genotyping; added several integration tests to cover this case. This commit will break the batched calls merging script, but Chris knows this and is ready for it...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5288 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
63f40215b3
commit
318035c147
|
|
@ -25,8 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
|
|
@ -65,7 +63,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
model = new HaplotypeIndelErrorModel(maxReadDeletionLength, UAC.INSERTION_START_PROBABILITY,
|
||||
UAC.INSERTION_END_PROBABILITY, UAC.ALPHA_DELETION_PROBABILITY, HAPLOTYPE_SIZE, false, DEBUGOUT);
|
||||
alleleList = new ArrayList<Allele>();
|
||||
getAlleleListFromVCF = UAC.GET_ALLELES_FROM_VCF;
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
}
|
||||
|
||||
|
|
@ -260,7 +258,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
|
||||
if (getAlleleListFromVCF) {
|
||||
|
||||
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "indels", null, ref.getLocus(), false, false) ) {
|
||||
for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), false, false) ) {
|
||||
if( vc_input != null && vc_input.isIndel() && ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,11 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
DINDEL
|
||||
}
|
||||
|
||||
public enum GENOTYPING_MODE {
|
||||
DISCOVERY,
|
||||
GENOTYPE_GIVEN_ALLELES
|
||||
}
|
||||
|
||||
protected UnifiedArgumentCollection UAC;
|
||||
protected Logger logger;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
|
|
@ -44,8 +45,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
// the alternate allele with the largest sum of quality scores
|
||||
protected Byte bestAlternateAllele = null;
|
||||
|
||||
private final boolean useAlleleFromVCF;
|
||||
|
||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
}
|
||||
|
||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
|
|
@ -63,18 +67,29 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
Allele refAllele = Allele.create(refBase, true);
|
||||
|
||||
// find the alternate allele with the largest sum of quality scores
|
||||
if ( alternateAlleleToUse == null )
|
||||
initializeBestAlternateAllele(refBase, contexts);
|
||||
else
|
||||
if ( alternateAlleleToUse != null ) {
|
||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
||||
} else if ( useAlleleFromVCF ) {
|
||||
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true);
|
||||
if ( vcInput == null )
|
||||
return null;
|
||||
if ( !vcInput.isBiallelic() ) {
|
||||
logger.info("Record at position " + ref.getLocus() + " is not bi-allelic; skipping...");
|
||||
return null;
|
||||
}
|
||||
if ( !vcInput.isSNP() ) {
|
||||
logger.info("Record at position " + ref.getLocus() + " is not a SNP; skipping...");
|
||||
return null;
|
||||
}
|
||||
bestAlternateAllele = vcInput.getAlternateAllele(0).getBases()[0];
|
||||
} else {
|
||||
initializeBestAlternateAllele(refBase, contexts);
|
||||
}
|
||||
|
||||
// if there are no non-ref bases...
|
||||
if ( bestAlternateAllele == null ) {
|
||||
// did we trigger on the provided track?
|
||||
boolean atTriggerTrack = tracker.getReferenceMetaData(UnifiedGenotyperEngine.TRIGGER_TRACK_NAME, false).size() > 0;
|
||||
|
||||
// if we don't want all bases, then we don't need to calculate genotype likelihoods
|
||||
if ( !atTriggerTrack && !UAC.ALL_BASES_MODE && !UAC.GENOTYPE_MODE )
|
||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||
return refAllele;
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
|
|
|
|||
|
|
@ -83,8 +83,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
|
|||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING )
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
// initialize the header
|
||||
|
|
|
|||
|
|
@ -44,11 +44,11 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
||||
public boolean GENOTYPE_MODE = false;
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||
|
||||
@Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false)
|
||||
public boolean ALL_BASES_MODE = false;
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
||||
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
|
|
@ -56,12 +56,6 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_calling", shortName = "trig_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be called", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_emitting", shortName = "trig_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
|
||||
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
|
||||
|
|
@ -90,9 +84,6 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
|
||||
// indel-related arguments
|
||||
@Argument(fullName = "get_indel_alleles_from_vcf", shortName = "getIndelAllelesFromVCF", doc = "Get reference/alt alleles for indel genotyping from VCF", required = false)
|
||||
public boolean GET_ALLELES_FROM_VCF = false;
|
||||
|
||||
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
|
||||
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
|
||||
|
||||
|
|
@ -101,32 +92,39 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
@Argument(fullName = "insertionStartProbability", shortName = "insertionStartProbability", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double INSERTION_START_PROBABILITY = 1e-3;
|
||||
|
||||
@Argument(fullName = "insertionEndProbability", shortName = "insertionEndProbability", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double INSERTION_END_PROBABILITY = 0.5;
|
||||
|
||||
@Argument(fullName = "alphaDeletionProbability", shortName = "alphaDeletionProbability", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double ALPHA_DELETION_PROBABILITY = 1e-3;
|
||||
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
|
||||
private Boolean ALL_BASES_DEPRECATED = false;
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="genotype", shortName="genotype", doc="Please use --output_mode EMIT_ALL_CONFIDENT_SITES instead" ,required=false)
|
||||
private Boolean GENOTYPE_DEPRECATED = false;
|
||||
|
||||
|
||||
public UnifiedArgumentCollection clone() {
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
|
||||
uac.GLmodel = GLmodel;
|
||||
uac.heterozygosity = heterozygosity;
|
||||
uac.PCR_error = PCR_error;
|
||||
uac.GENOTYPE_MODE = GENOTYPE_MODE;
|
||||
uac.ALL_BASES_MODE = ALL_BASES_MODE;
|
||||
uac.GenotypingMode = GenotypingMode;
|
||||
uac.OutputMode = OutputMode;
|
||||
uac.NO_SLOD = NO_SLOD;
|
||||
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
|
||||
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
uac.TRIGGER_CONFIDENCE_FOR_CALLING = TRIGGER_CONFIDENCE_FOR_CALLING;
|
||||
uac.TRIGGER_CONFIDENCE_FOR_EMITTING = TRIGGER_CONFIDENCE_FOR_EMITTING;
|
||||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
uac.MIN_MAPPING_QUALTY_SCORE = MIN_MAPPING_QUALTY_SCORE;
|
||||
uac.MAX_MISMATCHES = MAX_MISMATCHES;
|
||||
uac.USE_BADLY_MATED_READS = USE_BADLY_MATED_READS;
|
||||
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
|
||||
uac.GET_ALLELES_FROM_VCF = GET_ALLELES_FROM_VCF;
|
||||
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
|
||||
uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY;
|
||||
|
|
|
|||
|
|
@ -160,8 +160,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY));
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING )
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
return headerInfo;
|
||||
|
|
|
|||
|
|
@ -59,9 +59,14 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
|
||||
public class UnifiedGenotyperEngine {
|
||||
|
||||
public static final String TRIGGER_TRACK_NAME = "trigger";
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
public enum OUTPUT_MODE {
|
||||
EMIT_VARIANTS_ONLY,
|
||||
EMIT_ALL_CONFIDENT_SITES,
|
||||
EMIT_ALL_SITES
|
||||
}
|
||||
|
||||
// the unified argument collection
|
||||
private UnifiedArgumentCollection UAC = null;
|
||||
|
||||
|
|
@ -187,6 +192,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
|
||||
|
||||
Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse);
|
||||
|
||||
if (refAllele != null)
|
||||
|
|
@ -289,7 +295,7 @@ public class UnifiedGenotyperEngine {
|
|||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( bestAFguess != 0 ) {
|
||||
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
|
||||
|
|
@ -306,11 +312,8 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// did we trigger on the provided track?
|
||||
boolean atTriggerTrack = tracker.getReferenceMetaData(TRIGGER_TRACK_NAME, false).size() > 0;
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( !UAC.ALL_BASES_MODE && !passesEmitThreshold(phredScaledConfidence, bestAFguess, atTriggerTrack) ) {
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), true, 1.0 - PofF);
|
||||
|
|
@ -380,12 +383,12 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
Set<Allele> myAlleles = vc.getAlleles();
|
||||
// strip out the alternate allele if it's a ref call
|
||||
if ( bestAFguess == 0 ) {
|
||||
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new HashSet<Allele>(1);
|
||||
myAlleles.add(vc.getReference());
|
||||
}
|
||||
VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc,
|
||||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes);
|
||||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
// first off, we want to use the *unfiltered* context for the annotations
|
||||
|
|
@ -400,7 +403,7 @@ public class UnifiedGenotyperEngine {
|
|||
vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element.
|
||||
}
|
||||
|
||||
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack));
|
||||
VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence));
|
||||
call.setRefBase(refContext.getBase());
|
||||
return call;
|
||||
}
|
||||
|
|
@ -440,7 +443,7 @@ public class UnifiedGenotyperEngine {
|
|||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
||||
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
|
|
@ -561,7 +564,7 @@ public class UnifiedGenotyperEngine {
|
|||
// now, test for bad pileups
|
||||
|
||||
// in all_bases mode, it doesn't matter
|
||||
if ( UAC.ALL_BASES_MODE )
|
||||
if ( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
return true;
|
||||
|
||||
// is there no coverage?
|
||||
|
|
@ -620,16 +623,12 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
}
|
||||
|
||||
protected boolean passesEmitThreshold(double conf, int bestAFguess, boolean atTriggerTrack) {
|
||||
return (atTriggerTrack ?
|
||||
(conf >= Math.min(UAC.TRIGGER_CONFIDENCE_FOR_CALLING, UAC.TRIGGER_CONFIDENCE_FOR_EMITTING)) :
|
||||
((UAC.GENOTYPE_MODE || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING)));
|
||||
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
|
||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
}
|
||||
|
||||
protected boolean passesCallThreshold(double conf, boolean atTriggerTrack) {
|
||||
return (atTriggerTrack ?
|
||||
(conf >= UAC.TRIGGER_CONFIDENCE_FOR_CALLING) :
|
||||
(conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING));
|
||||
protected boolean passesCallThreshold(double conf) {
|
||||
return conf >= UAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
}
|
||||
|
||||
protected void computeAlleleFrequencyPriors(int N) {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.walkers.By;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.BaseMismatchModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
|
|
@ -80,7 +79,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
|||
|
||||
public void initialize() {
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.ALL_BASES_MODE = true;
|
||||
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
|
||||
// print the header
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
|
|||
public void initialize() {
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.STANDARD_CONFIDENCE_FOR_CALLING = uac.STANDARD_CONFIDENCE_FOR_EMITTING = confidence;
|
||||
uac.ALL_BASES_MODE = true;
|
||||
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
UG = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
|
||||
out.println("#locus\tid\tdownsampled_coverage\tpct_coverage\titeration\tref\teval_call\tcomp_call\tvariant_concordance\tgenotype_concordance");
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ public class ValidationGenotyper extends LocusWalker<ValidationGenotyper.Counted
|
|||
}
|
||||
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.ALL_BASES_MODE = true;
|
||||
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
engine = new UnifiedGenotyperEngine(getToolkit(),uac);
|
||||
|
||||
logger.info( "Overlapping samples = " + overlappingSamples );
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// ********************************************************************************** //
|
||||
|
|
@ -18,7 +20,7 @@ public class
|
|||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing joint estimation model
|
||||
// testing normal calling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
|
|
@ -26,15 +28,35 @@ public class
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("e7514b0f1f2df1ca42815b5c45775f36"));
|
||||
executeTest("testMultiSamplePilot1", spec);
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
public void testMultiSamplePilot2AndRecallingWithAlleles() {
|
||||
String md5 = "49ae129435063f47f0faf00337eb8bf7";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("49ae129435063f47f0faf00337eb8bf7"));
|
||||
executeTest("testMultiSamplePilot2", spec);
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("test MultiSample Pilot2", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("e95c545b8ae06f0721f260125cfbe1f0"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("6c96d76b9bc3aade0c768d7c657ae210"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -42,7 +64,7 @@ public class
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("8da08fe12bc0d95e548fe63681997038"));
|
||||
executeTest("testSingleSamplePilot2", spec);
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -58,7 +80,7 @@ public class
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("gz"), Arrays.asList(COMPRESSED_OUTPUT_MD5));
|
||||
executeTest("testCompressedOutput", spec);
|
||||
executeTest("test compressed output", spec);
|
||||
}
|
||||
|
||||
// todo -- fixme
|
||||
|
|
@ -103,16 +125,28 @@ public class
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testParameter() {
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "4ffcb1e1f20ce175783c32c30deef8db" );
|
||||
e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" );
|
||||
e.put( "-all_bases", "3d98205a31a133c11e518e095dc7ab65" );
|
||||
e.put( "--min_base_quality_score 26", "5f1cfb9c7f82e6414d5db7aa344813ac" );
|
||||
e.put( "--min_mapping_quality_score 26", "6c3ad441f3a23ade292549b1dea80932" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "5ecaf4281410b67e8e2e164f2ea0d58a" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "17ffb56d078fdde335a79773e9534ce7" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest(String.format("test calling parameter[%s]", entry.getKey()), spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "4ffcb1e1f20ce175783c32c30deef8db" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "3d98205a31a133c11e518e095dc7ab65" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
|
||||
|
|
@ -126,12 +160,12 @@ public class
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("17ffb56d078fdde335a79773e9534ce7"));
|
||||
executeTest("testConfidence1", spec1);
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("d49ec8c1476cecb8e3153894cc0f6662"));
|
||||
executeTest("testConfidence2", spec2);
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -149,7 +183,7 @@ public class
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
|
||||
executeTest(String.format("test heterozyosity[%s]", entry.getKey()), spec);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -168,6 +202,6 @@ public class
|
|||
1,
|
||||
Arrays.asList("5974d8c21d27d014e2d0bed695b0b42e"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.queue.util._
|
|||
import java.io.File
|
||||
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
||||
import org.broadinstitute.sting.gatk.DownsampleType
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||
import org.broadinstitute.sting.queue.function.CommandLineFunction
|
||||
|
|
@ -72,8 +73,7 @@ class ProjectManagement(stingPath: String) {
|
|||
calc.scatterCount = if (bams.size < 5 ) 1 else if (bams.size < 50) 60 else 120
|
||||
calc.min_base_quality_score = Some(22)
|
||||
calc.min_mapping_quality_score = Some(20)
|
||||
calc.genotype = true
|
||||
calc.output_all_callable_bases = true
|
||||
//calc.genotyping_mode = Option[GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES]
|
||||
calc.out = outVCF
|
||||
calc.rodBind :+= new RodBind("allele","VCF",alleleVCF)
|
||||
calc.intervals :+= intervals
|
||||
|
|
@ -97,4 +97,4 @@ class ProjectManagement(stingPath: String) {
|
|||
def swapExt(file: File, oldExtension: String, newExtension: String) =
|
||||
new File(file.getName.stripSuffix(oldExtension) + newExtension)
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue