Generalize the calculation of the genotype likelihoods in HC to cope with haploid and multiploidy

Changes in several walker to use new sample, allele closed lists and new GenotypingEngine constructors signatures

Rebase adoption of new calculation system in walkers
This commit is contained in:
Valentin Ruano-Rubio 2014-08-18 15:29:28 -04:00
parent f08dcbc160
commit 9ee9da36bb
37 changed files with 712 additions and 399 deletions

View File

@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
*/
@Hidden
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)

View File

@ -68,6 +68,28 @@ import java.util.List;
*/
public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
private final int cachePloidyCapacity;
private final int cacheAlleleCountCapacity;
private ThreadLocal<GenotypeLikelihoodCalculator[][]> likelihoodCalculators;
/**
* Create a new infinite model instance.
*/
public InfiniteRandomMatingPopulationModel() {
this(10,50);
}
public InfiniteRandomMatingPopulationModel(final int calculatorCachePloidyCapacity, final int calculatorCacheAlleleCapacity) {
cachePloidyCapacity = calculatorCachePloidyCapacity;
cacheAlleleCountCapacity = calculatorCachePloidyCapacity;
likelihoodCalculators = new ThreadLocal<GenotypeLikelihoodCalculator[][]>( ) {
@Override
public GenotypeLikelihoodCalculator[][] initialValue() {
return new GenotypeLikelihoodCalculator[calculatorCachePloidyCapacity][calculatorCacheAlleleCapacity];
}
};
}
@Override
public <A extends Allele> GenotypingLikelihoods<A> calculateLikelihoods(final AlleleList<A> genotypingAlleles, final GenotypingData<A> data) {
if (genotypingAlleles == null)
@ -99,18 +121,27 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
}
private <A extends Allele> GenotypingLikelihoods<A> singleSampleLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper) {
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper) {
final PloidyModel ploidyModel = data.ploidyModel();
final int samplePloidy = ploidyModel.samplePloidy(0);
final int alleleCount = genotypingAlleles.alleleCount();
final GenotypeLikelihoodCalculator likelihoodsCalculator =
GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(0));
final List<GenotypeLikelihoods> genotypeLikelihoods = Collections.singletonList(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
return new GenotypingLikelihoods<>(genotypingAlleles,ploidyModel,genotypeLikelihoods);
}
private GenotypeLikelihoodCalculator getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) {
if (samplePloidy >= cacheAlleleCountCapacity)
return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
else if (alleleCount >= cacheAlleleCountCapacity)
return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
final GenotypeLikelihoodCalculator[][] cache = likelihoodCalculators.get();
final GenotypeLikelihoodCalculator result = cache[samplePloidy][alleleCount];
return result != null ? result : (cache[samplePloidy][alleleCount] = GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount));
}
private <A extends Allele> GenotypingLikelihoods<A> multiSampleHeterogeneousPloidyModelLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper,
@ -120,8 +151,7 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
final int alleleCount = genotypingAlleles.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final int samplePloidy = ploidyModel.samplePloidy(i);
final GenotypeLikelihoodCalculator likelihoodsCalculator =
GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
}
@ -136,8 +166,7 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
final int samplePloidy = ploidyModel.samplePloidy(0);
final List<GenotypeLikelihoods> genotypeLikelihoods = new ArrayList<>(sampleCount);
final int alleleCount = genotypingAlleles.alleleCount();
final GenotypeLikelihoodCalculator likelihoodsCalculator =
GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
for (int i = 0; i < sampleCount; i++) {
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));

View File

@ -47,16 +47,19 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.samtools.SAMUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.genotyping.GenotypeAlleleCounts;
import org.broadinstitute.gatk.genotyping.GenotypeLikelihoodCalculator;
import org.broadinstitute.gatk.genotyping.GenotypeLikelihoodCalculators;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import java.util.*;
@ -424,18 +427,9 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
*/
public static int[] getAlleleCountFromPLIndex(final int nAlleles, final int numChromosomes, final int PLindex) {
// todo - another brain-dead inefficient implementation, can do much better by computing in closed form
final SumIterator iterator = new SumIterator(nAlleles,numChromosomes);
while (iterator.hasNext()) {
final int[] plVec = iterator.getCurrentVector();
if (iterator.getLinearIndex() == PLindex)
return plVec;
iterator.next();
}
return null;
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(numChromosomes, nAlleles);
final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(PLindex);
return alleleCounts.alleleCountsByIndex(nAlleles - 1);
}
/*

View File

@ -48,15 +48,17 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
@ -69,8 +71,6 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import java.util.*;
@ -86,22 +86,20 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected Logger logger;
protected final GenomeAnalysisEngine toolkit;
protected final Config configuration;
protected VariantAnnotatorEngine annotationEngine;
protected final int numberOfGenomes;
protected final Collection<String> sampleNames;
protected final SampleList samples;
private final double[] log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels;
private final GenomeLocParser genomeLocParser;
protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>() {
@ -111,59 +109,32 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
}
};
/**
* Construct a new genotyper engine.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration) {
this(toolkit,configuration,resolveSampleNamesFromToolkit(toolkit));
}
/**
* Resolve the sample name set to be the set of all samples passed to the tool.
*
* @param toolkit reference to the toolkit.
*
* @throws IllegalArgumentException if the {@code toolkit} is {@code null}.
*
* @return never {@code null}, but empty if there is no samples.
*/
private static Set<String> resolveSampleNamesFromToolkit(final GenomeAnalysisEngine toolkit) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
return new LinkedHashSet<>(toolkit.getSampleDB().getSampleNames());
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
* @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit
* @param samples subset of sample to work on identified by their names. If {@code null}, the full toolkit
* sample set will be used instead.
* @param genomeLocParser the genome-loc-parser
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,final Set<String> sampleNames) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) {
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
this.configuration = configuration;
logger = Logger.getLogger(getClass());
this.toolkit = toolkit;
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
numberOfGenomes = this.sampleNames.size() * configuration.genotypeArgs.samplePloidy;
this.samples = samples;
numberOfGenomes = this.samples.sampleCount() * configuration.genotypeArgs.samplePloidy;
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity,configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity,configuration.genotypeArgs.inputPrior);
genomeLocParser = toolkit.getGenomeLocParser();
this.genomeLocParser = genomeLocParser;
}
/**
@ -257,7 +228,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
! outputAlternativeAlleles.siteIsMonomorphic ||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
? AFresult.getLog10PosteriorOfAFEq0() + 0.0
: AFresult.getLog10PosteriorOfAFGT0() + 0.0;
: AFresult.getLog10PosteriorOfAFGT0() + 0.0 ;
// Add 0.0 removes -0.0 occurrences.
@ -270,6 +241,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getModelTheta(model), true, PoFGT0);
// start constructing the resulting VC
final GenomeLocParser genomeLocParser = this.genomeLocParser != null || refContext == null ? this.genomeLocParser : refContext.getGenomeLocParser();
if (genomeLocParser == null)
throw new IllegalStateException("this UG engine was created without a valid genomeLocParser and no refContext was provided");
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles);
@ -506,7 +480,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
double log10POfRef = Math.log10(initialPofRef);
// for each sample that we haven't examined yet
for ( String sample : sampleNames ) {
final int sampleCount = samples.sampleCount();
for (int i = 0; i < sampleCount; i++) {
final String sample = samples.sampleAt(i);
final AlignmentContext context = contexts.get(sample);
if ( ignoreCoveredSamples && context != null )
continue;

View File

@ -46,7 +46,10 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
@ -199,14 +202,14 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
* Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
*/
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
protected List<String> annotationsToUse = new ArrayList<>();
/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments,
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<String>();
protected List<String> annotationsToExclude = new ArrayList<>();
/**
* If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
@ -218,11 +221,6 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// the calculation arguments
private UnifiedGenotypingEngine genotypingEngine = null;
// the annotation engine
private VariantAnnotatorEngine annotationEngine;
private Set<String> samples;
// enable deletions in the pileup
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -256,17 +254,22 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
*
**/
public void initialize() {
super.initialize();
final GenomeAnalysisEngine toolkit = getToolkit();
final Set<String> sampleNameSet;
if ( UAC.TREAT_ALL_READS_AS_SINGLE_POOL ) {
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
sampleNameSet = Collections.singleton(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
// get all of the unique sample names
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
sampleNameSet = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
if ( UAC.referenceSampleName != null )
samples.remove(UAC.referenceSampleName);
sampleNameSet.remove(UAC.referenceSampleName);
}
final SampleList samples = new IndexedSampleList(sampleNameSet);
if ( UAC.CONTAMINATION_FRACTION_FILE != null )
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, sampleNameSet, logger));
// check for a bad max alleles value
if ( UAC.genotypeArgs.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
@ -282,8 +285,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
@ -298,11 +301,11 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
final Set<String> samplesForHeader;
if ( ! onlyEmitSamples.isEmpty() ) {
// make sure that onlyEmitSamples is a subset of samples
if ( ! samples.containsAll(onlyEmitSamples) )
if ( ! sampleNameSet.containsAll(onlyEmitSamples) )
throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
samplesForHeader = onlyEmitSamples;
} else {
samplesForHeader = samples;
samplesForHeader = sampleNameSet;
}
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
@ -310,7 +313,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
// all annotation fields from VariantAnnotatorEngine
if ( annotationEngine != null )

View File

@ -45,12 +45,16 @@
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser;
@ -62,9 +66,6 @@ import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
@ -88,7 +89,6 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
// the various loggers and writers
private PrintStream verboseWriter;
private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine;
// ---------------------------------------------------------------------------------------------------------
@ -97,36 +97,42 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
//
// ---------------------------------------------------------------------------------------------------------
/**
* Constructs a new Unified-Genotyper engine.
* <p>The new engine won't emmit annotations, will use the full sample set and will not produce additional verbose
* output</p>
* Creates a new unified genotyping given the UG configuration parameters and the GA engine.
* @param configuration the UG configuration.
* @param toolkit the GA engine.
*
* @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
* @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) {
this(toolkit, configuration, null);
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final GenomeAnalysisEngine toolkit) {
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
}
/**
* Constructs a new Unified-Genotyper engine.
* Creates a new unified genotyping given the UG configuration parameters, the targeted set of samples and
* a genome location parser.
*
* @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object.
* @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set.
* @param configuration the UG configuration.
* @param samples {@inheritDoc}
* @param baqCalculationMode the BAQ calculation mode.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
* @throws NullPointerException if any of {@code configuration}, {@code samples} or {@code genomeLocParser} is {@code null}.
*
* @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
final Set<String> sampleNames) {
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final SampleList samples, final GenomeLocParser genomeLocParser,
final BAQ.CalculationMode baqCalculationMode) {
super(toolkit,configuration,sampleNames);
super(configuration,samples,genomeLocParser);
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
if (baqCalculationMode == null)
throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
this.BAQEnabledOnCMDLine = baqCalculationMode != BAQ.CalculationMode.OFF;
determineGLModelsToUse();
@ -302,7 +308,8 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine,
genomeLocParser != null || refContext == null ? genomeLocParser : refContext.getGenomeLocParser(), perReadAlleleLikelihoodMap);
}

View File

@ -48,10 +48,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.lang.reflect.Constructor;
import java.util.LinkedList;
@ -105,6 +103,24 @@ public class AFCalcFactory {
}
public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; }
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) {
final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel;
if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferred;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
}
private static final Map<String, Class<? extends AFCalc>> afClasses;
@ -137,25 +153,10 @@ public class AFCalcFactory {
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples,
final Logger logger) {
final int maxAltAlleles = UAC.genotypeArgs.MAX_ALTERNATE_ALLELES;
if ( ! UAC.AFmodel.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) ) {
logger.info("Requested ploidy " + UAC.genotypeArgs.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
final List<Calculation> supportingCalculations = new LinkedList<Calculation>();
for ( final Calculation calc : Calculation.values() ) {
if ( calc.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) )
supportingCalculations.add(calc);
}
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
UAC.requestedAlleleFrequencyCalculationModel);
if ( supportingCalculations.isEmpty() )
throw new UserException("no AFCalculation model found that supports ploidy of " + UAC.genotypeArgs.samplePloidy + " and max alt alleles " + maxAltAlleles);
else if ( supportingCalculations.size() > 1 )
logger.debug("Warning, multiple supporting AFCalcs found " + Utils.join(",", supportingCalculations) + " choosing first arbitrarily");
else
UAC.AFmodel = supportingCalculations.get(0);
logger.info("Selecting model " + UAC.AFmodel);
}
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, maxAltAlleles, UAC.genotypeArgs.samplePloidy);
final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy);
if ( logger != null ) calc.setLogger(logger);
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);

View File

@ -56,7 +56,7 @@ import htsjdk.variant.variantcontext.*;
import java.util.*;
public class GeneralPloidyExactAFCalc extends ExactAFCalc {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;

View File

@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@ -113,7 +114,7 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc
}
@Override
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final List<String> samples, final Map<String, List<GATKSAMRecord>> perSampleReadList) {
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map<String, List<GATKSAMRecord>> perSampleReadList) {
final GraphBasedLikelihoodCalculationEngineInstance graphLikelihoodEngine =
new GraphBasedLikelihoodCalculationEngineInstance(assemblyResultSet,
hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution);

View File

@ -46,7 +46,11 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.genotyping.AlleleList;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Path;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Route;
@ -61,7 +65,6 @@ import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pairhmm.FlexibleHMM;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.util.*;
@ -233,12 +236,13 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final List<Haplotype> haplotypes, final List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final List<Haplotype> haplotypes, final SampleList samples,
final Map<String, List<GATKSAMRecord>> perSampleReadList) {
// General preparation on the input haplotypes:
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);
final List<Haplotype> sortedHaplotypes = new ArrayList<>(haplotypes);
Collections.sort(sortedHaplotypes, Haplotype.ALPHANUMERICAL_COMPARATOR);
final AlleleList<Haplotype> alleles = new IndexedAlleleList<>(sortedHaplotypes);
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, alleles, perSampleReadList);
// The actual work:
final int sampleCount = result.sampleCount();
@ -315,7 +319,7 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
private void calculatePerReadAlleleLikelihoodMapHaplotypeProcessing(final int haplotypeIndex,
final ReadLikelihoods.Matrix<Haplotype> likelihoods,
final Map<MultiDeBruijnVertex, Set<ReadSegmentCost>> costsEndingByVertex) {
final Haplotype haplotype = likelihoods.allele(haplotypeIndex);
final Haplotype haplotype = likelihoods.alleleAt(haplotypeIndex);
final HaplotypeRoute haplotypeRoute = haplotypeGraph.getHaplotypeRoute(haplotype);
final Set<MultiDeBruijnVertex> haplotypeVertices = haplotypeRoute.vertexSet();
final Map<GATKSAMRecord, ReadCost> readCostByRead = new HashMap<>();

View File

@ -52,6 +52,7 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
@ -64,15 +65,12 @@ import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
@ -606,7 +604,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// the minimum length of a read we'd consider using for genotyping
private final static int MIN_READ_LENGTH = 10;
private List<String> samplesList;
private SampleList samplesList;
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@ -626,9 +624,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
public void initialize() {
super.initialize();
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY)
throw new UserException.BadArgumentValue("-ploidy", "" + SCAC.genotypeArgs.samplePloidy + "; currently HaplotypeCaller only supports diploid sample analysis (-ploidy 2)");
if (dontGenotype && emitReferenceConfidence())
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
@ -656,12 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
samplesList = new ArrayList<>(SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()));
Set<String> samplesSet = new LinkedHashSet<>(samplesList);
final GenomeAnalysisEngine toolkit = getToolkit();
samplesList = toolkit.getReadSampleList();
final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
@ -672,20 +664,24 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.CONTAMINATION_FRACTION_FILE = null;
simpleUAC.exactCallsLog = null;
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, samplesSet);
// Seems that at least with some test data we can lose genuine haploid variation if we use
// UGs engine with ploidy == 1
simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger));
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, sampleSet, logger));
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
Set<VCFHeaderLine> headerInfo = new HashSet<>();
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// all annotation fields from VariantAnnotatorEngine
@ -707,13 +703,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY) {
if (SCAC.emitReferenceConfidence != ReferenceConfidenceMode.NONE)
throw new UserException.BadArgumentValue("ERC", "For now ploidies different that 2 are not allow for GVCF or BP_RESOLUTION outputs");
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotypingEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
initializeReferenceConfidenceModel(samplesSet, headerInfo);
initializeReferenceConfidenceModel(samplesList, headerInfo);
vcfWriter.writeHeader(new VCFHeader(headerInfo, samplesSet));
vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
try {
// fasta reference reader to supplement the edges of the reference sequence
@ -771,10 +774,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
}
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
private void initializeReferenceConfidenceModel(final SampleList samples, final Set<VCFHeaderLine> headerInfo) {
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
if ( samples.sampleCount() != 1 )
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
@ -784,7 +788,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands,SCAC.genotypeArgs.samplePloidy);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
@ -857,8 +861,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
final GenotypingModel genotypingModel = genotypingEngine.getGenotypingModel();
for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
final String sampleName = sample.getKey();
// The ploidy here is not dictated by the sample but by the simple genotyping-engine used to determine whether regions are active or not.
final int activeRegionDetectionHackishSamplePloidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sampleName,activeRegionDetectionHackishSamplePloidy,genotypingModel,sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
@ -969,8 +977,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
( consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles ),
emitReferenceConfidence() );
(consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles),
emitReferenceConfidence());
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
@ -997,7 +1005,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// output variant containing region.
result.addAll(referenceConfidenceModel.calculateRefConfidence(assemblyResult.getReferenceHaplotype(),
calledHaplotypes.getCalledHaplotypes(), assemblyResult.getPaddedReferenceLoc(), regionForGenotyping,
readLikelihoods, calledHaplotypes.getCalls()));
readLikelihoods, genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), calledHaplotypes.getCalls()));
// output right-flanking non-variant section:
if (trimmingResult.hasRightFlankingRegion())
result.addAll(referenceModelForNoVariation(trimmingResult.nonVariantRightFlankRegion(),false));
@ -1110,7 +1118,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final List<Haplotype> haplotypes = Collections.singletonList(refHaplotype);
return referenceConfidenceModel.calculateRefConfidence(refHaplotype, haplotypes,
paddedLoc, region, createDummyStratifiedReadMap(refHaplotype, samplesList, region),
Collections.<VariantContext>emptyList());
genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), Collections.<VariantContext>emptyList());
} else
return NO_CALLS;
}
@ -1123,11 +1131,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
* @return a map from sample -> PerReadAlleleLikelihoodMap that maps each read to ref
*/
public static ReadLikelihoods<Haplotype> createDummyStratifiedReadMap(final Haplotype refHaplotype,
final List<String> samples,
final SampleList samples,
final ActiveRegion region) {
return new ReadLikelihoods<>(samples, Collections.singletonList(refHaplotype),
return new ReadLikelihoods<>(samples, new IndexedAlleleList<>(refHaplotype),
splitReadsBySample(samples, region.getReads()));
}
@ -1235,18 +1242,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
return splitReadsBySample(samplesList, reads);
}
public static Map<String, List<GATKSAMRecord>> splitReadsBySample( final List<String> samplesList, final Collection<GATKSAMRecord> reads ) {
private static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) {
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
for( final String sample : samplesList) {
List<GATKSAMRecord> readList = returnMap.get( sample );
if( readList == null ) {
readList = new ArrayList<>();
returnMap.put(sample, readList);
}
}
for( final GATKSAMRecord read : reads ) {
final int sampleCount = samplesList.sampleCount();
for (int i = 0; i < sampleCount; i++)
returnMap.put(samplesList.sampleAt(i), new ArrayList<GATKSAMRecord>());
for( final GATKSAMRecord read : reads )
returnMap.get(read.getReadGroup().getSample()).add(read);
}
return returnMap;
}

View File

@ -45,9 +45,9 @@
*/
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
/**
* Set of arguments for the {@link HaplotypeCaller}

View File

@ -48,8 +48,9 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
@ -64,7 +65,6 @@ import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotype.MergeVariantsAcrossHaplotypes;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import java.util.*;
@ -80,28 +80,33 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private final boolean doPhysicalPhasing;
private final GenotypingModel genotypingModel;
private final PloidyModel ploidyModel;
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param samples {@inheritDoc}
* @param genomeLocParser {@inheritDoc}
* @param doPhysicalPhasing whether to try physical phasing.
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean doPhysicalPhasing) {
super(toolkit,configuration);
this.doPhysicalPhasing = doPhysicalPhasing;
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser);
if (genomeLocParser == null)
throw new IllegalArgumentException("the genome location parser provided cannot be null");
this.doPhysicalPhasing= doPhysicalPhasing;
ploidyModel = new HomogeneousPloidyModel(samples,configuration.genotypeArgs.samplePloidy);
genotypingModel = new InfiniteRandomMatingPopulationModel();
}
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param sampleNames {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
doPhysicalPhasing = true;
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
this(configuration,samples,genomeLocParser,false);
}
/**
* Change the merge variant across haplotypes for this engine.
*
@ -198,9 +203,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
if (ref == null || ref.length == 0 ) throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got " + Arrays.toString(ref));
if (refLoc == null || refLoc.size() != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null");
if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null");
if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null");
// update the haplotypes so we're ready to call, getting the ordered list of positions on the reference
// that carry events among the haplotypes
@ -228,20 +233,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
if( mergedVC == null )
continue;
if( mergedVC == null ) { continue; }
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
if (emitReferenceConfidence) {
final List<Allele> alleleList = new ArrayList<>();
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
mergedVC = vcb.make();
}
if (emitReferenceConfidence)
mergedVC = addNonRefSymbolicAllele(mergedVC);
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
@ -264,7 +265,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC );
final VariantContext call = calculateGenotypes(null,null,null,null,new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false,null);
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
@ -494,6 +495,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
}
private VariantContext addNonRefSymbolicAllele(final VariantContext mergedVC) {
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
final List<Allele> originalList = mergedVC.getAlleles();
final List<Allele> alleleList = new ArrayList<>(originalList.size() + 1);
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
return vcb.make();
}
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
// used for genotyping.
private ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
@ -653,22 +664,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
@Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC ) {
final GenotypesContext genotypes = GenotypesContext.create(readLikelihoods.sampleCount());
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
for (final String sample : readLikelihoods.samples() ) {
final int numHaplotypes = mergedVC.getAlleles().size();
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
final double[][] haplotypeLikelihoodMatrix = PairHMMLikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, readLikelihoods, mergedVC.getAlleles(), true);
int glIndex = 0;
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
}
}
logger.debug(" Likelihoods for sample " + sample + " : " + Arrays.toString(genotypeLikelihoods));
genotypes.add(new GenotypeBuilder(sample).alleles(NO_CALL).PL(genotypeLikelihoods).make());
}
return genotypes;
final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
final int sampleCount = samples.sampleCount();
final GenotypesContext result = GenotypesContext.create(sampleCount);
for (int s = 0; s < sampleCount; s++)
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
return result;
}
/**
@ -779,4 +782,22 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return (vc == null ? -1 : vc.getAlleles().hashCode());
}
}
/**
* Returns the ploidy-model used by this genotyping engine.
*
* @return never {@code null}.
*/
public PloidyModel getPloidyModel() {
return ploidyModel;
}
/**
* Returns the genotyping-model used by this genotyping engine.
*
* @return never {@code null}.
*/
public GenotypingModel getGenotypingModel() {
return genotypingModel;
}
}

View File

@ -49,7 +49,11 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.samtools.SAMUtils;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.genotyping.AlleleList;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -59,7 +63,6 @@ import org.broadinstitute.gatk.utils.pairhmm.*;
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatCovariate;
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatLengthCovariate;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.*;
import java.io.File;
import java.io.FileNotFoundException;
@ -249,12 +252,13 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
}
@Override
public ReadLikelihoods<Haplotype> computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final List<String> samples, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
public ReadLikelihoods<Haplotype> computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
final List<Haplotype> haplotypeList = assemblyResultSet.getHaplotypeList();
final AlleleList<Haplotype> haplotypes = new IndexedAlleleList<>(haplotypeList);
// configure the HMM
initializePairHMM(haplotypes, perSampleReadList);
initializePairHMM(haplotypeList, perSampleReadList);
// Add likelihoods for each sample's reads to our result
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);

View File

@ -46,11 +46,14 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.genotyping.AlleleList;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.util.HashMap;
import java.util.List;
@ -63,17 +66,15 @@ import java.util.Random;
public class RandomLikelihoodCalculationEngine implements ReadLikelihoodCalculationEngine {
@Override
public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
final List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
final SampleList samples,
final Map<String, List<GATKSAMRecord>> reads) {
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
final AlleleList<Haplotype> haplotypes = new IndexedAlleleList<>(assemblyResultSet.getHaplotypeList());
final ReadLikelihoods result = new ReadLikelihoods(samples, haplotypes, reads);
final Map<Haplotype,Allele> alleles = new HashMap<>(haplotypes.size());
for (final Haplotype haplotype : haplotypes)
alleles.put(haplotype,Allele.create(haplotype,false));
final Map<Haplotype,Allele> alleles = new HashMap<>(haplotypes.alleleCount());
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final int sampleCount = samples.size();
final int alleleCount = alleles.size();
final int sampleCount = samples.sampleCount();
final int alleleCount = haplotypes.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final List<GATKSAMRecord> sampleReads = result.sampleReads(i);
final int readCount = sampleReads.size();

View File

@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
@ -81,6 +82,7 @@ public interface ReadLikelihoodCalculationEngine {
* active region assembly process.
*
* @param assemblyResultSet the input assembly results.
* @param samples the list of targeted samples.
* @param perSampleReadList the input read sets stratified per sample.
*
* @throws NullPointerException if either parameter is {@code null}.
@ -88,7 +90,7 @@ public interface ReadLikelihoodCalculationEngine {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
public ReadLikelihoods<Haplotype> computeReadLikelihoods(AssemblyResultSet assemblyResultSet, List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(AssemblyResultSet assemblyResultSet, SampleList samples,
Map<String, List<GATKSAMRecord>> perSampleReadList);
public void close();

View File

@ -46,6 +46,8 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
/**
* Holds information about a genotype call of a single sample reference vs. any non-ref event
*
@ -58,7 +60,7 @@ final class RefVsAnyResult {
/**
* The genotype likelihoods for ref/ref ref/non-ref non-ref/non-ref
*/
final double[] genotypeLikelihoods = new double[3];
final double[] genotypeLikelihoods;
/**
* AD field value for ref / non-ref
@ -74,7 +76,31 @@ final class RefVsAnyResult {
* Cap the het and hom var likelihood values by the hom ref likelihood.
*/
protected void capByHomRefLikelihood() {
genotypeLikelihoods[1] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[1]);
genotypeLikelihoods[2] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[2]);
final int likelihoodCount = genotypeLikelihoods.length;
for (int i = 1; i < likelihoodCount; i++)
genotypeLikelihoods[i] = Math.min(genotypeLikelihoods[0],genotypeLikelihoods[i]);
}
/**
* Creates a new ref-vs-alt result assuming 3 as the number of genotype likelihoods (human ploidy.
*/
@Deprecated
public RefVsAnyResult() {
genotypeLikelihoods =
new double[(HomoSapiensConstants.DEFAULT_PLOIDY * (HomoSapiensConstants.DEFAULT_PLOIDY + 1)) >> 1];
}
/**
* Creates a new ref-vs-alt result indicating the genotype likelihood vector capacity.
* @param likelihoodCapacity the required capacity of the likelihood array, should match the possible number of
* genotypes given the number of alleles (always 2), ploidy (arbitrary) less the genotyping
* model non-sense genotype count if applies.
* @throws IllegalArgumentException if {@code likelihoodCapacity} is negative.
*/
public RefVsAnyResult(final int likelihoodCapacity) {
if (likelihoodCapacity < 0)
throw new IllegalArgumentException("likelihood capacity is negative");
genotypeLikelihoods = new double[likelihoodCapacity];
}
}

View File

@ -51,11 +51,13 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSimpleHeaderLine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.locusiterator.LocusIteratorByState;
@ -85,8 +87,8 @@ public class ReferenceConfidenceModel {
public final static String ALTERNATE_ALLELE_STRING = "ALT"; // arbitrary alternate allele
private final GenomeLocParser genomeLocParser;
private final Set<String> samples;
private final SAMFileHeader header; // TODO -- really shouldn't depend on this
private final SampleList samples;
private final int indelInformativeDepthIndelSize;
private final static boolean WRITE_DEBUGGING_BAM = false;
@ -103,18 +105,17 @@ public class ReferenceConfidenceModel {
* @param indelInformativeDepthIndelSize the max size of indels to consider when calculating indel informative depths
*/
public ReferenceConfidenceModel(final GenomeLocParser genomeLocParser,
final Set<String> samples,
final SampleList samples,
final SAMFileHeader header,
final int indelInformativeDepthIndelSize) {
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
if ( samples == null ) throw new IllegalArgumentException("samples cannot be null");
if ( samples.isEmpty() ) throw new IllegalArgumentException("samples cannot be empty");
if ( samples.sampleCount() == 0) throw new IllegalArgumentException("samples cannot be empty");
if ( header == null ) throw new IllegalArgumentException("header cannot be empty");
if ( indelInformativeDepthIndelSize < 0) throw new IllegalArgumentException("indelInformativeDepthIndelSize must be >= 1 but got " + indelInformativeDepthIndelSize);
this.genomeLocParser = genomeLocParser;
this.samples = samples;
this.header = header;
this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
if ( WRITE_DEBUGGING_BAM ) {
@ -124,8 +125,6 @@ public class ReferenceConfidenceModel {
} else {
debuggingWriter = null;
}
initializeIndelPLCache();
}
/**
@ -151,7 +150,7 @@ public class ReferenceConfidenceModel {
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the activeregion.getLoc(), each containing
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
@ -162,6 +161,8 @@ public class ReferenceConfidenceModel {
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param model genotyping model.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
@ -173,6 +174,8 @@ public class ReferenceConfidenceModel {
final GenomeLoc paddedReferenceLoc,
final ActiveRegion activeRegion,
final ReadLikelihoods<Haplotype> readLikelihoods,
final PloidyModel ploidyModel,
final GenotypingModel model,
final List<VariantContext> variantCalls) {
if ( refHaplotype == null ) throw new IllegalArgumentException("refHaplotype cannot be null");
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
@ -182,12 +185,15 @@ public class ReferenceConfidenceModel {
if ( readLikelihoods == null ) throw new IllegalArgumentException("readLikelihoods cannot be null");
if ( readLikelihoods.sampleCount() != 1 ) throw new IllegalArgumentException("readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.sampleCount());
if ( refHaplotype.length() != activeRegion.getExtendedLoc().size() ) throw new IllegalArgumentException("refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getLocation().size() + " are different");
if ( ploidyModel == null) throw new IllegalArgumentException("the ploidy model cannot be null");
if ( model == null) throw new IllegalArgumentException("the genotyping model cannot be null");
final int ploidy = ploidyModel.samplePloidy(0); // the first sample = the only sample in reference-confidence mode.
final GenomeLoc refSpan = activeRegion.getLocation();
final List<ReadBackedPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.sample(0);
final String sampleName = readLikelihoods.sampleAt(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedLoc().getStart();
for ( final ReadBackedPileup pileup : refPileups ) {
@ -201,20 +207,20 @@ public class ReferenceConfidenceModel {
// otherwise emit a reference confidence variant context
final int refOffset = offset + globalRefOffset;
final byte refBase = ref[refOffset];
final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, (byte)6, null);
final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(sampleName,ploidy,model,pileup, refBase, (byte)6, null);
homRefCalc.capByHomRefLikelihood();
final Allele refAllele = Allele.create(refBase, true);
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final VariantContextBuilder vcb = new VariantContextBuilder("HC", curPos.getContig(), curPos.getStart(), curPos.getStart(), refSiteAlleles);
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Arrays.asList(refAllele, refAllele));
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(refAllele, ploidy));
gb.AD(homRefCalc.AD_Ref_Any);
gb.DP(homRefCalc.getDP());
// genotype likelihood calculation
final GenotypeLikelihoods snpGLs = GenotypeLikelihoods.fromLog10Likelihoods(homRefCalc.genotypeLikelihoods);
final int nIndelInformativeReads = calcNIndelInformativeReads(pileup, refOffset, ref, indelInformativeDepthIndelSize);
final GenotypeLikelihoods indelGLs = getIndelPLs(nIndelInformativeReads);
final GenotypeLikelihoods indelGLs = getIndelPLs(ploidy,nIndelInformativeReads);
// now that we have the SNP and indel GLs, we take the one with the least confidence,
// as this is the most conservative estimate of our certainty that we are hom-ref.
@ -251,23 +257,51 @@ public class ReferenceConfidenceModel {
* Get indel PLs corresponding to seeing N nIndelInformativeReads at this site
*
* @param nInformativeReads the number of reads that inform us about being ref without an indel at this site
* @param ploidy the requested ploidy.
* @return non-null GenotypeLikelihoods given N
*/
protected final GenotypeLikelihoods getIndelPLs(final int nInformativeReads) {
return indelPLCache[nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads];
protected final GenotypeLikelihoods getIndelPLs(final int ploidy, final int nInformativeReads) {
if (ploidy > MAX_N_INDEL_PLOIDY)
throw new IllegalArgumentException("you have hit a current limitation of the GVCF output model that cannot handle ploidies larger than " + MAX_N_INDEL_PLOIDY + " , please let the GATK team about it: " + ploidy);
return indelPLCache(ploidy, nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads);
}
protected static final int MAX_N_INDEL_INFORMATIVE_READS = 40; // more than this is overkill because GQs are capped at 99 anyway
private static final GenotypeLikelihoods[] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
private static final int MAX_N_INDEL_PLOIDY = 20;
private static final GenotypeLikelihoods[][] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_PLOIDY][];
private static final double INDEL_ERROR_RATE = -4.5; // 10^-4.5 indel errors per bp
private void initializeIndelPLCache() {
for( int nInformativeReads = 0; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
final double homRef = 0.0;
final double het = MathUtils.LOG_ONE_HALF * nInformativeReads;
final double homVar = INDEL_ERROR_RATE * nInformativeReads;
indelPLCache[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(new double[]{homRef, het, homVar});
private final GenotypeLikelihoods indelPLCache(final int ploidy, final int nInformativeReads) {
GenotypeLikelihoods[] indelPLCacheByPloidy = indelPLCache[ploidy];
if (indelPLCacheByPloidy == null)
return initializeIndelPLCache(ploidy)[nInformativeReads];
else
return indelPLCacheByPloidy[nInformativeReads];
}
private synchronized GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) {
// Double-check whether another thread has done the initialization.
if (indelPLCache[ploidy] != null)
return indelPLCache[ploidy];
final double denominator = - MathUtils.Log10Cache.get(ploidy);
final GenotypeLikelihoods[] result = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
result[0] = GenotypeLikelihoods.fromLog10Likelihoods(new double[ploidy + 1]);
for( int nInformativeReads = 1; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
final byte indelQual = (byte) Math.round((INDEL_ERROR_RATE * -10));
final double refLikelihood = QualityUtils.qualToProbLog10(indelQual);
final double altLikelihood = QualityUtils.qualToErrorProbLog10(indelQual);
double[] PLs = new double[ploidy + 1];
PLs[0] = nInformativeReads * refLikelihood;
for (int altCount = 1; altCount <= ploidy; altCount++) {
final double refLikelihoodAccum = refLikelihood + MathUtils.Log10Cache.get(ploidy - altCount);
final double altLikelihoodAccum = altLikelihood + MathUtils.Log10Cache.get(altCount);
PLs[altCount] = nInformativeReads * (MathUtils.approximateLog10SumLog10(refLikelihoodAccum ,altLikelihoodAccum) + denominator);
}
result[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(PLs);
}
indelPLCache[ploidy] = result;
return result;
}
/**
@ -279,6 +313,7 @@ public class ReferenceConfidenceModel {
* @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
* @return a RefVsAnyResult genotype call
*/
@Deprecated
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
final RefVsAnyResult result = new RefVsAnyResult();
@ -305,6 +340,73 @@ public class ReferenceConfidenceModel {
return result;
}
/**
* Calculate the genotype likelihoods for the sample in pileup for being hom-ref contrasted with being ref vs. alt
*
* @param sampleName target sample name.
* @param ploidy target sample ploidy.
* @param genotypingModel model to calculate likelihoods and genotypes.
* @param pileup the read backed pileup containing the data we want to evaluate
* @param refBase the reference base at this pileup position
* @param minBaseQual the min base quality for a read in the pileup at the pileup position to be included in the calculation
* @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
* @return a RefVsAnyResult genotype call.
*/
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final String sampleName, final int ploidy,
final GenotypingModel genotypingModel,
final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(Allele.create(refBase,true),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
// Notice that the sample name is rather irrelevant as this information is never used, just need to be the same in both lines bellow.
final int maximumReadCount = pileup.getReads().size();
final List<GATKSAMRecord> reads = new ArrayList<>(maximumReadCount);
final double[][] likelihoods = new double[2][maximumReadCount];
final int[] adCounts = new int[2];
int nextIndex = 0;
for (final PileupElement p : pileup) {
final byte qual = p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual();
if (!p.isDeletion() && qual <= minBaseQual)
continue;
final GATKSAMRecord read = p.getRead();
reads.add(read);
final boolean isAlt = p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart()
|| p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip();
final int bestAllele;
final int worstAllele;
if (isAlt) {
bestAllele = 1;
worstAllele = 0;
} else {
bestAllele = 0;
worstAllele = 1;
}
likelihoods[bestAllele][nextIndex] = QualityUtils.qualToProbLog10(qual);
likelihoods[worstAllele][nextIndex++] = QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD;
adCounts[bestAllele]++;
if (isAlt && hqSoftClips != null && p.isNextToSoftClip())
hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) 28));
}
final Map<String,List<GATKSAMRecord>> sampleToReads = Collections.singletonMap(sampleName,reads);
final ReadLikelihoods<Allele> readLikelihoods = new ReadLikelihoods<>(new IndexedSampleList(sampleName),alleleList,sampleToReads);
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = readLikelihoods.sampleMatrix(0);
final int readCount = sampleLikelihoods.readCount();
for (int i = 0; i < readCount; i++) {
sampleLikelihoods.set(0,i,likelihoods[0][i]);
sampleLikelihoods.set(1,i,likelihoods[1][i]);
}
final PloidyModel ploidyModel = new HomogeneousPloidyModel(new IndexedSampleList(sampleName),ploidy);
final GenotypingLikelihoods<Allele> genotypingLikelihoods = genotypingModel.calculateLikelihoods(alleleList, new GenotypingData<>(ploidyModel, readLikelihoods));
final double[] genotypeLikelihoodArray = genotypingLikelihoods.sampleLikelihoods(0).getAsVector();
final RefVsAnyResult result = new RefVsAnyResult(genotypeLikelihoodArray.length);
System.arraycopy(genotypeLikelihoodArray,0,result.genotypeLikelihoods,0,genotypeLikelihoodArray.length);
System.arraycopy(adCounts,0,result.AD_Ref_Any,0,2);
return result;
}
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
@ -330,7 +432,7 @@ public class ReferenceConfidenceModel {
debuggingWriter.addAlignment(read);
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
true, genomeLocParser, samples, false);
true, genomeLocParser, SampleListUtils.asSet(samples), false);
final List<ReadBackedPileup> pileups = new LinkedList<>();
final int startPos = activeRegionSpan.getStart();

View File

@ -49,6 +49,9 @@ package org.broadinstitute.gatk.tools.walkers.indels;
import com.google.java.contract.Ensures;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.genotyping.AlleleList;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -444,10 +447,10 @@ public class PairHMMIndelErrorModel {
// Apparently more than one allele can map to the same haplotype after trimming
final Set<Haplotype> distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values());
final List<Haplotype> distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
final AlleleList<Haplotype> distinctHaplotypesList = new IndexedAlleleList<>(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
// Get the likelihoods for our clipped read against each of our trimmed haplotypes.
final ReadLikelihoods<Haplotype> rl = new ReadLikelihoods<>(
Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
new IndexedSampleList(Collections.singletonList("DUMMY_SAMPLE")),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
final ReadLikelihoods.Matrix<Haplotype> dummySampleLikelihoods = rl.sampleMatrix(0);
pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap);

View File

@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.validation;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -349,13 +350,14 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac);
snpEngine = new UnifiedGenotypingEngine(uac,toolkit);
// Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel);
indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;

View File

@ -46,8 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
@ -111,6 +115,7 @@ import java.util.*;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-10,stop=10))
@SuppressWarnings("unused")
public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWriter> implements AnnotatorCompatible, TreeReducible<VariantContextWriter> {
/**
@ -159,12 +164,13 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
variants.addAll(variantCollection.getRodBindings());
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
final Set<String> samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
final GenomeAnalysisEngine toolkit = getToolkit();
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), createUAC(), samples);
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
// create the annotation engine
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, getToolkit());
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit);
// take care of the VCF headers
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
@ -179,7 +185,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
final VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader);
}

View File

@ -46,6 +46,10 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -120,14 +124,18 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final GenomeAnalysisEngine toolkit = getToolkit();
final SampleList samples =
new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
hInfo.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
vcfWriter.writeHeader(new VCFHeader(hInfo, samples));
vcfWriter.writeHeader(new VCFHeader(hInfo, sampleNameSet));
}
/**

View File

@ -46,15 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -70,9 +69,7 @@ public class GVCFWriter implements VariantContextWriter {
//
// static VCF field names
//
protected final static String BLOCK_SIZE_INFO_FIELD = "BLOCK_SIZE";
protected final static String MIN_DP_FORMAT_FIELD = "MIN_DP";
protected final static String MIN_GQ_FORMAT_FIELD = "MIN_GQ";
//
// Final fields initialized in constructor
@ -87,6 +84,7 @@ public class GVCFWriter implements VariantContextWriter {
String contigOfNextAvailableStart = null;
private String sampleName = null;
private HomRefBlock currentBlock = null;
private final int defaultPloidy;
/**
* Is the proposed GQ partitions well-formed?
@ -94,7 +92,7 @@ public class GVCFWriter implements VariantContextWriter {
* @param GQPartitions proposed GQ partitions
* @return a non-null string if something is wrong (string explains issue)
*/
protected static List<HomRefBlock> parsePartitions(final List<Integer> GQPartitions) {
protected static List<HomRefBlock> parsePartitions(final List<Integer> GQPartitions, final int defaultPloidy) {
if ( GQPartitions == null ) throw new IllegalArgumentException("GQpartitions cannot be null");
if ( GQPartitions.isEmpty() ) throw new IllegalArgumentException("GQpartitions cannot be empty");
@ -104,10 +102,10 @@ public class GVCFWriter implements VariantContextWriter {
if ( value == null ) throw new IllegalArgumentException("GQPartitions contains a null integer");
if ( value < lastThreshold ) throw new IllegalArgumentException("GQPartitions is out of order. Last is " + lastThreshold + " but next is " + value);
if ( value == lastThreshold ) throw new IllegalArgumentException("GQPartitions is equal elements: Last is " + lastThreshold + " but next is " + value);
result.add(new HomRefBlock(lastThreshold, value));
result.add(new HomRefBlock(lastThreshold, value,defaultPloidy));
lastThreshold = value;
}
result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE));
result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE,defaultPloidy));
return result;
}
@ -128,11 +126,13 @@ public class GVCFWriter implements VariantContextWriter {
*
* @param underlyingWriter the ultimate destination of the GVCF records
* @param GQPartitions a well-formed list of GQ partitions
* @param defaultPloidy the assumed ploidy for input variant context without one.
*/
public GVCFWriter(final VariantContextWriter underlyingWriter, final List<Integer> GQPartitions) {
public GVCFWriter(final VariantContextWriter underlyingWriter, final List<Integer> GQPartitions, final int defaultPloidy) {
if ( underlyingWriter == null ) throw new IllegalArgumentException("underlyingWriter cannot be null");
this.underlyingWriter = underlyingWriter;
this.GQPartitions = parsePartitions(GQPartitions);
this.GQPartitions = parsePartitions(GQPartitions,defaultPloidy);
this.defaultPloidy = defaultPloidy;
}
/**
@ -148,10 +148,6 @@ public class GVCFWriter implements VariantContextWriter {
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
// These annotations are no longer standard
//header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
//header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
for ( final HomRefBlock partition : GQPartitions ) {
header.addMetaDataLine(partition.toVCFHeaderLine());
}
@ -188,27 +184,30 @@ public class GVCFWriter implements VariantContextWriter {
* @return a VariantContext to be emitted, or null if non is appropriate
*/
protected VariantContext addHomRefSite(final VariantContext vc, final Genotype g) {
if ( nextAvailableStart != -1 ) {
// don't create blocks while the hom-ref site falls before nextAvailableStart (for deletions)
if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) ) {
if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) )
return null;
}
// otherwise, reset to non-relevant
nextAvailableStart = -1;
contigOfNextAvailableStart = null;
}
if ( currentBlock == null ) {
currentBlock = createNewBlock(vc, g);
return null;
} else if ( currentBlock.withinBounds(g.getGQ()) ) {
final VariantContext result;
if (genotypeCanBeMergedInCurrentBlock(g)) {
currentBlock.add(vc.getStart(), g);
return null;
result = null;
} else {
final VariantContext result = blockToVCF(currentBlock);
result = blockToVCF(currentBlock);
currentBlock = createNewBlock(vc, g);
return result;
}
return result;
}
private boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
return currentBlock != null && currentBlock.withinBounds(g.getGQ()) && currentBlock.getPloidy() == g.getPloidy()
&& (currentBlock.getMinPLs() == null || !g.hasPL() || (currentBlock.getMinPLs().length == g.getPL().length));
}
/**
@ -226,21 +225,20 @@ public class GVCFWriter implements VariantContextWriter {
* Convert a HomRefBlock into a VariantContext
*
* @param block the block to convert
* @return a VariantContext representing the gVCF encoding for this block
* @return a VariantContext representing the gVCF encoding for this block.
* It will return {@code null} if input {@code block} is {@code null}, indicating that there
* is no variant-context to be output into the VCF.
*/
private VariantContext blockToVCF(final HomRefBlock block) {
if ( block == null ) throw new IllegalArgumentException("block cannot be null");
if ( block == null ) return null;
final VariantContextBuilder vcb = new VariantContextBuilder(block.getStartingVC());
vcb.attributes(new HashMap<String, Object>(2)); // clear the attributes
vcb.stop(block.getStop());
vcb.attribute(VCFConstants.END_KEY, block.getStop());
// This annotation is no longer standard
//vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// create the single Genotype with GQ and DP annotations
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef()));
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(block.getRef(),block.getPloidy()));
gb.noAD().noPL().noAttributes(); // clear all attributes
gb.GQ(block.getMedianGQ());
gb.DP(block.getMedianDP());
@ -269,10 +267,12 @@ public class GVCFWriter implements VariantContextWriter {
break;
}
}
if ( partition == null ) throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition " + partition);
if ( partition == null )
throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition");
// create the block, add g to it, and return it for use
final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound());
final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound(), defaultPloidy);
block.add(vc.getStart(), g);
return block;
}

View File

@ -46,11 +46,11 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.broadinstitute.gatk.utils.MathUtils;
import java.util.ArrayList;
import java.util.List;
@ -75,6 +75,7 @@ final class HomRefBlock {
final private List<Integer> GQs = new ArrayList<>(100);
final private List<Integer> DPs = new ArrayList<>(100);
private final Allele ref;
private final int ploidy;
/**
* Create a new HomRefBlock
@ -83,7 +84,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
public HomRefBlock(final VariantContext startingVC, int minGQ, int maxGQ) {
public HomRefBlock(final VariantContext startingVC, final int minGQ, final int maxGQ, final int defaultPloidy) {
if ( startingVC == null ) throw new IllegalArgumentException("startingVC cannot be null");
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
@ -92,6 +93,7 @@ final class HomRefBlock {
this.ref = startingVC.getReference();
this.minGQ = minGQ;
this.maxGQ = maxGQ;
this.ploidy = startingVC.getMaxPloidy(defaultPloidy);
}
/**
@ -100,7 +102,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
public HomRefBlock(int minGQ, int maxGQ) {
public HomRefBlock(final int minGQ, final int maxGQ, final int ploidy) {
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
this.startingVC = null;
@ -108,6 +110,7 @@ final class HomRefBlock {
this.ref = null;
this.minGQ = minGQ;
this.maxGQ = maxGQ;
this.ploidy = ploidy;
}
/**
@ -119,19 +122,18 @@ final class HomRefBlock {
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
if ( g.getPloidy() != ploidy)
throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + g.getPloidy() + " != " + ploidy);
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
if( minPLs == null )
minPLs = g.getPL();
else { // otherwise take the min with the provided genotype's PLs
final int[] PL = g.getPL();
if( PL.length == 3 ) {
minPLs = PL.clone();
}
} else { // otherwise take the min with the provided genotype's PLs
final int[] PL = g.getPL();
if( PL.length == 3 ) {
minPLs[0] = Math.min(minPLs[0], PL[0]);
minPLs[1] = Math.min(minPLs[1], PL[1]);
minPLs[2] = Math.min(minPLs[2], PL[2]);
}
if (PL.length != minPLs.length)
throw new IllegalStateException("trying to merge different PL array sizes: " + PL.length + " != " + minPLs.length);
for (int i = 0; i < PL.length; i++)
if (minPLs[i] > PL[i])
minPLs[i] = PL[i];
}
stop = pos;
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
@ -182,4 +184,12 @@ final class HomRefBlock {
public VCFHeaderLine toVCFHeaderLine() {
return new VCFHeaderLine("GVCFBlock", "minGQ=" + getGQLowerBound() + "(inclusive),maxGQ=" + getGQUpperBound() + "(exclusive)");
}
/**
* Get the ploidy of this hom-ref block.
* @return
*/
public int getPloidy() {
return ploidy;
}
}

View File

@ -48,6 +48,9 @@ package org.broadinstitute.gatk.utils.haplotype;
import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.genotyping.AlleleList;
import org.broadinstitute.gatk.genotyping.AlleleListUtils;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@ -77,7 +80,9 @@ public class HaplotypeLDCalculator {
@SuppressWarnings("unchecked")
protected HaplotypeLDCalculator() {
haplotypes = Collections.emptyList();
readLikelihoods = new ReadLikelihoods<>((List<String>)Collections.EMPTY_LIST, (List<Haplotype>)Collections.EMPTY_LIST, Collections.EMPTY_MAP);
final AlleleList<Haplotype> alleleList = AlleleListUtils.emptyList();
readLikelihoods = new ReadLikelihoods<>(SampleListUtils.emptyList(),
alleleList, Collections.EMPTY_MAP);
}
public HaplotypeLDCalculator(final List<Haplotype> haplotypes, final ReadLikelihoods<Haplotype> haplotypeReadMap) {

View File

@ -50,15 +50,17 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
// the imports for unit testing.
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@ -76,7 +78,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
engine.setArguments(new GATKArgumentCollection());
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final Set<String> fakeSamples = Collections.singleton("fake");
final SampleList fakeSamples = SampleListUtils.singletonList("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
}

View File

@ -47,6 +47,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.pairhmm.ActiveRegionTestDataSet;
import org.broadinstitute.gatk.utils.pairhmm.FastLoglessPairHMM;
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
@ -112,7 +113,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
public void timeGraphBasedLikelihoods(final int reps) {
for (int i = 0; i < reps; i++) {
final GraphBasedLikelihoodCalculationEngineInstance rtlce = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(), new FastLoglessPairHMM((byte)10),Double.NEGATIVE_INFINITY,HeterogeneousKmerSizeResolution.COMBO_MAX);
rtlce.computeReadLikelihoods(dataSet.haplotypeList(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
rtlce.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}
@ -121,7 +122,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}

View File

@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.HaplotypeGraph;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
@ -262,7 +263,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
dataSet = (ActiveRegionTestDataSet) params[0];
if (INTRODUCE_READ_ERRORS) dataSet.introduceErrors(new Random(13));
graphEngine = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(),hmm,Double.NEGATIVE_INFINITY, HeterogeneousKmerSizeResolution.COMBO_MAX);
graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
// clip reads at the anchors.
final Map<GATKSAMRecord,GATKSAMRecord> clippedReads = anchorClippedReads(graphEngine.getHaplotypeGraph(),dataSet.readList());
@ -272,7 +273,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
clippedReadList.add(clippedReads.containsKey(r) ? clippedReads.get(r) : r);
}
loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
// Change clipped by unclipped in the resulting likelihood map.
for (final GATKSAMRecord r : clippedReads.keySet()) {

View File

@ -47,11 +47,12 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.SAMFileHeader;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.GenotypeType;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -61,7 +62,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
@ -75,7 +76,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final String RGID = "ID1";
GATKSAMReadGroupRecord rg;
final String sample = "NA12878";
final Set<String> samples = Collections.singleton(sample);
final SampleList samples = SampleListUtils.singletonList(sample);
SAMFileHeader header;
ReferenceConfidenceModel model;
@ -179,12 +180,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
@Test
public void testIndelLikelihoods() {
GenotypeLikelihoods prev = model.getIndelPLs(0);
GenotypeLikelihoods prev = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,0);
Assert.assertEquals(prev.getAsPLs(), new int[]{0, 0, 0});
Assert.assertEquals(-10 * prev.getLog10GQ(GenotypeType.HOM_REF), 0.0);
for ( int i = 1; i <= ReferenceConfidenceModel.MAX_N_INDEL_INFORMATIVE_READS; i++ ) {
final GenotypeLikelihoods current = model.getIndelPLs(i);
final GenotypeLikelihoods current = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,i);
final double prevGQ = -10 * prev.getLog10GQ(GenotypeType.HOM_REF);
final double currGQ = -10 * current.getLog10GQ(GenotypeType.HOM_REF);
Assert.assertTrue(prevGQ < currGQ, "GQ Failed with prev " + prev + " curr " + current + " at " + i);
@ -288,10 +289,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
@ -304,12 +307,14 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
final List<VariantContext> calls = Collections.emptyList();
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
data.getActiveRegion().add(data.makeRead(start, readLen));
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getLocation().size(), 0));
for ( int i = start; i < readLen + start; i++ ) expectedDPs.set(i, 1);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
@ -340,10 +345,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}

View File

@ -48,6 +48,8 @@ package org.broadinstitute.gatk.utils.genotyper;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
@ -72,7 +74,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testInstantiationAndQuery(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
Assert.assertEquals(result.sampleCount(), samples.length);
Assert.assertEquals(result.alleleCount(), alleles.length);
@ -85,7 +87,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testLikelihoodFillingAndQuery(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] likelihoods = fillWithRandomLikelihoods(samples, alleles, result);
testLikelihoodMatrixQueries(samples, result, likelihoods);
}
@ -106,7 +108,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final int alleleCount = alleles.length;
for (int s = 0; s < samples.length; s++) {
@ -146,7 +148,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleleMap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final Map<Allele,List<GATKSAMRecord>> expected = new HashMap<>(alleles.length);
for (final Allele allele : alleles)
@ -171,7 +173,7 @@ public class ReadLikelihoodsUnitTest
}
}
if ((bestAlleleLk - secondBestAlleleLk) > ReadLikelihoods.BestAllele.INFORMATIVE_THRESHOLD)
expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.read(r));
expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.readAt(r));
}
}
@ -189,7 +191,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterPoorlyModeledReads(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
for (int s = 0; s < samples.length; s++) {
final int sampleReadCount = original.sampleReadCount(s);
@ -220,7 +222,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterReadsToOverlap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
@ -231,7 +233,7 @@ public class ReadLikelihoodsUnitTest
newLikelihoods[s][a] = new double[(original.sampleReadCount(s) + 1) / 2];
final ReadLikelihoods.Matrix<Allele> sampleMatrix = original.sampleMatrix(s);
for (int r = 0; r < newLikelihoods[s][a].length; r++) {
Assert.assertEquals(result.readIndex(s,sampleMatrix.read(r << 1)),r);
Assert.assertEquals(result.readIndex(s,sampleMatrix.readAt(r << 1)),r);
newLikelihoods[s][a][r] = sampleMatrix.get(a, r << 1);
}
}
@ -240,14 +242,14 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalizationWithOverlap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads, final Map<Allele,List<Allele>> newToOldAlleleMapping) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods<Allele> marginalized = original.marginalize(newToOldAlleleMapping,evenReadOverlap);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix<Allele> oldSmapleLikelihoods = original.sampleMatrix(s);
@ -268,13 +270,13 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalization(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads, final Map<Allele,List<Allele>> newToOldAlleleMapping) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods<Allele> marginalized = original.marginalize(newToOldAlleleMapping);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix<Allele> oldSmapleLikelihoods = original.sampleMatrix(s);
@ -295,7 +297,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeBestToZero(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(true, Double.NEGATIVE_INFINITY);
@ -321,7 +323,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLK(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(false, - 0.001);
@ -354,7 +356,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLKAndBestToZero(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(true, - 0.001);
@ -390,7 +392,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
@ -411,8 +413,8 @@ public class ReadLikelihoodsUnitTest
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
// We add too more amongst exisisting alleles:
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1),
result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.alleleAt(1),
result.alleleAt(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
Assert.assertEquals(original.alleleCount()+3,result.alleleCount());
@ -439,7 +441,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
result.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
@ -473,13 +475,13 @@ public class ReadLikelihoodsUnitTest
private void testLikelihoodMatrixQueries(String[] samples, ReadLikelihoods<Allele> result, final double[][][] likelihoods) {
for (final String sample : samples) {
final int sampleIndex = result.sampleIndex(sample);
final double[][] likelihoodMatrix = result.sampleValues(sampleIndex);
final int sampleReadCount = result.sampleReadCount(sampleIndex);
Assert.assertEquals(result.alleleCount(), likelihoodMatrix.length);
for (int a = 0; a < likelihoodMatrix.length; a++) {
Assert.assertEquals(likelihoodMatrix[a].length,sampleReadCount);
final int alleleCount = result.alleleCount();
Assert.assertEquals(result.alleleCount(), alleleCount);
for (int a = 0; a < alleleCount; a++) {
Assert.assertEquals(result.sampleReadCount(0),sampleReadCount);
for (int r = 0; r < sampleReadCount; r++)
Assert.assertEquals(likelihoodMatrix[a][r],
Assert.assertEquals(result.sampleMatrix(0).get(a,r),
likelihoods == null ? 0.0 : likelihoods[sampleIndex][a][r], EPSILON);
}
}

View File

@ -46,14 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceModel;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -100,21 +100,21 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHeaderWriting() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.writeHeader(new VCFHeader());
Assert.assertTrue(mockWriter.headerWritten);
}
@Test
public void testClose() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close();
Assert.assertTrue(mockWriter.closed);
}
@Test
public void testCloseWithoutClosingUnderlyingWriter() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close(false);
Assert.assertFalse(mockWriter.closed);
}
@ -164,7 +164,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseEmitsLastVariant() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 0);
@ -176,7 +176,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseDoesntEmitsLastVariantWhenNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -188,7 +188,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -204,7 +204,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryToLowerPositionsRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 30, 30));
writer.add(makeHomRef("20", 31, 30));
@ -220,7 +220,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 20, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -235,7 +235,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -248,7 +248,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRefThenNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -283,7 +283,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testVariantForcesNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -300,7 +300,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testEmittingTwoBands() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -315,7 +315,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testNonContiguousBlocks() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -329,7 +329,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testDeletion() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -347,7 +347,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHomRefAlt() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, 2);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -383,7 +383,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test(dataProvider = "BandPartitionData")
public void testMyData(final List<Integer> partitions, final boolean expectedGood) {
try {
GVCFWriter.parsePartitions(partitions);
GVCFWriter.parsePartitions(partitions,2);
Assert.assertTrue(expectedGood, "Expected to fail but didn't");
} catch ( Exception e ) {
Assert.assertTrue(! expectedGood, "Expected to succeed but failed with message " + e.getMessage());

View File

@ -46,11 +46,11 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.BaseTest;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.utils.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -58,7 +58,6 @@ import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class HomRefBlockUnitTest extends BaseTest {
@ -71,7 +70,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBasicConstruction() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, 2);
Assert.assertSame(band.getStartingVC(), vc);
Assert.assertEquals(band.getRef(), vc.getReference());
Assert.assertEquals(band.getGQLowerBound(), 10);
@ -86,7 +85,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testMinMedian() {
//TODO - might be better to make this test use a data provider?
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
int pos = vc.getStart();
@ -117,7 +116,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBigGQIsCapped() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make());
@ -126,7 +125,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadAdd() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make());
@ -156,7 +155,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(dataProvider = "ContiguousData")
public void testIsContiguous(final String contig, final int pos, final boolean expected) {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make();
Assert.assertEquals(band.isContiguous(testVC), expected);
}

View File

@ -26,14 +26,13 @@
package org.broadinstitute.gatk.engine;
import com.google.java.contract.Ensures;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.datasources.reads.*;
@ -55,8 +54,12 @@ import org.broadinstitute.gatk.engine.refdata.utils.RMDTriplet;
import org.broadinstitute.gatk.engine.resourcemanagement.ThreadAllocation;
import org.broadinstitute.gatk.engine.samples.SampleDB;
import org.broadinstitute.gatk.engine.samples.SampleDBBuilder;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
@ -64,7 +67,6 @@ import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
import org.broadinstitute.gatk.utils.text.XReadLines;
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
import htsjdk.variant.vcf.VCFConstants;
import java.io.File;
import java.io.FileNotFoundException;
@ -1138,7 +1140,7 @@ public class GenomeAnalysisEngine {
* Returns data source objects encapsulating all rod data;
* individual rods can be accessed through the returned data source objects.
*
* @return the rods data sources
* @return the rods data sources, never {@code null}.
*/
public List<ReferenceOrderedDataSource> getRodDataSources() {
return this.rodDataSources;
@ -1254,4 +1256,20 @@ public class GenomeAnalysisEngine {
runtimeLimitInNanoseconds = TimeUnit.NANOSECONDS.convert(args.maxRuntime, args.maxRuntimeUnits);
}
}
/**
* Returns the sample list including all samples.
* @return never {@code null}.
*/
public SampleList getSampleList() {
return new IndexedSampleList(getSampleDB().getSampleNames());
}
/**
* Returns the sample list including samples in read inputs.
* @return never {@code null}.
*/
public SampleList getReadSampleList() {
return new IndexedSampleList(SampleUtils.getSAMFileSamples(getSAMFileHeader()));
}
}

View File

@ -37,6 +37,24 @@ import java.util.List;
*/
public class AlleleListUtils {
@SuppressWarnings("unchecked")
private static final AlleleList EMPTY_LIST = new AlleleList() {
@Override
public int alleleCount() {
return 0;
}
@Override
public int alleleIndex(final Allele allele) {
return -1;
}
@Override
public Allele alleleAt(final int index) {
throw new IllegalArgumentException("allele index is out of range");
}
};
/**
* Checks whether two allele lists are in fact the same.
* @param first one list to compare.
@ -107,6 +125,16 @@ public class AlleleListUtils {
return new AsList(list);
}
/**
* Returns an unmodifiable empty allele-list.
* @param <A> the allele class.
* @return never {@code null}.
*/
@SuppressWarnings("unchecked")
public static final <A extends Allele> AlleleList<A> emptyList() {
return EMPTY_LIST;
}
/**
* Simple list view of a sample-list.
*/

View File

@ -34,6 +34,33 @@ import java.util.*;
*/
public class SampleListUtils {
private static final SampleList EMPTY_LIST = new SampleList() {
@Override
public int sampleCount() {
return 0;
}
@Override
public int sampleIndex(String sample) {
return -1;
}
@Override
public String sampleAt(final int sampleIndex) {
throw new IllegalArgumentException("index is out of valid range");
}
};
/**
* Empty list.
*
* @return never {@code null}
*/
public static SampleList emptyList() {
return EMPTY_LIST;
}
/**
* Checks whether two sample lists are in fact the same.
* @param first one list to compare.

View File

@ -60,9 +60,9 @@ public class SampleUtils {
* @param header the sam file header
* @return list of strings representing the sample names
*/
public static Set<String> getSAMFileSamples(SAMFileHeader header) {
public static Set<String> getSAMFileSamples(final SAMFileHeader header) {
// get all of the unique sample names
Set<String> samples = new TreeSet<String>();
final Set<String> samples = new TreeSet<String>();
List<SAMReadGroupRecord> readGroups = header.getReadGroups();
for ( SAMReadGroupRecord readGroup : readGroups )
samples.add(readGroup.getSample());

View File

@ -85,6 +85,24 @@ public class GATKVariantContextUtils {
return true;
}
/**
* Returns a homozygous call allele list given the only allele and the ploidy.
*
* @param allele the only allele in the allele list.
* @param ploidy the ploidy of the resulting allele list.
*
* @throws IllegalArgumentException if {@code allele} is {@code null} or ploidy is negative.
*
* @return never {@code null}.
*/
public static List<Allele> homozygousAlleleList(final Allele allele, final int ploidy) {
if (allele == null || ploidy < 0)
throw new IllegalArgumentException();
// Use a tailored inner class to implement the list:
return Collections.nCopies(ploidy,allele);
}
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.