diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java
index 7c69ab014..3f7794b24 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java
@@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
*/
@Hidden
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
- public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
+ public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModel.java
index 6b0594ee0..f476d940c 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModel.java
@@ -68,6 +68,28 @@ import java.util.List;
*/
public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
+ private final int cachePloidyCapacity;
+ private final int cacheAlleleCountCapacity;
+ private ThreadLocal likelihoodCalculators;
+
+ /**
+ * Create a new infinite model instance.
+ */
+ public InfiniteRandomMatingPopulationModel() {
+ this(10,50);
+ }
+
+ public InfiniteRandomMatingPopulationModel(final int calculatorCachePloidyCapacity, final int calculatorCacheAlleleCapacity) {
+ cachePloidyCapacity = calculatorCachePloidyCapacity;
+ cacheAlleleCountCapacity = calculatorCachePloidyCapacity;
+ likelihoodCalculators = new ThreadLocal( ) {
+ @Override
+ public GenotypeLikelihoodCalculator[][] initialValue() {
+ return new GenotypeLikelihoodCalculator[calculatorCachePloidyCapacity][calculatorCacheAlleleCapacity];
+ }
+ };
+ }
+
@Override
public GenotypingLikelihoods calculateLikelihoods(final AlleleList genotypingAlleles, final GenotypingData data) {
if (genotypingAlleles == null)
@@ -99,18 +121,27 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
}
private GenotypingLikelihoods singleSampleLikelihoods(final AlleleList genotypingAlleles,
- final GenotypingData data,
- final AlleleLikelihoodMatrixMapper alleleLikelihoodMatrixMapper) {
+ final GenotypingData data,
+ final AlleleLikelihoodMatrixMapper alleleLikelihoodMatrixMapper) {
final PloidyModel ploidyModel = data.ploidyModel();
final int samplePloidy = ploidyModel.samplePloidy(0);
final int alleleCount = genotypingAlleles.alleleCount();
- final GenotypeLikelihoodCalculator likelihoodsCalculator =
- GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
+ final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(0));
final List genotypeLikelihoods = Collections.singletonList(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
return new GenotypingLikelihoods<>(genotypingAlleles,ploidyModel,genotypeLikelihoods);
}
+ private GenotypeLikelihoodCalculator getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) {
+ if (samplePloidy >= cacheAlleleCountCapacity)
+ return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
+ else if (alleleCount >= cacheAlleleCountCapacity)
+ return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
+ final GenotypeLikelihoodCalculator[][] cache = likelihoodCalculators.get();
+ final GenotypeLikelihoodCalculator result = cache[samplePloidy][alleleCount];
+ return result != null ? result : (cache[samplePloidy][alleleCount] = GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount));
+ }
+
private GenotypingLikelihoods multiSampleHeterogeneousPloidyModelLikelihoods(final AlleleList genotypingAlleles,
final GenotypingData data,
final AlleleLikelihoodMatrixMapper alleleLikelihoodMatrixMapper,
@@ -120,8 +151,7 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
final int alleleCount = genotypingAlleles.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final int samplePloidy = ploidyModel.samplePloidy(i);
- final GenotypeLikelihoodCalculator likelihoodsCalculator =
- GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
+ final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
}
@@ -136,8 +166,7 @@ public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
final int samplePloidy = ploidyModel.samplePloidy(0);
final List genotypeLikelihoods = new ArrayList<>(sampleCount);
final int alleleCount = genotypingAlleles.alleleCount();
- final GenotypeLikelihoodCalculator likelihoodsCalculator =
- GenotypeLikelihoodCalculator.getInstance(samplePloidy,alleleCount);
+ final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
for (int i = 0; i < sampleCount; i++) {
final ReadLikelihoods.Matrix sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
index 0495db541..5abb1cd85 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@@ -47,16 +47,19 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.samtools.SAMUtils;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypeLikelihoods;
+import htsjdk.variant.vcf.VCFConstants;
+import org.broadinstitute.gatk.genotyping.GenotypeAlleleCounts;
+import org.broadinstitute.gatk.genotyping.GenotypeLikelihoodCalculator;
+import org.broadinstitute.gatk.genotyping.GenotypeLikelihoodCalculators;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.gatk.utils.MathUtils;
-import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
-import htsjdk.variant.variantcontext.Allele;
-import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import java.util.*;
@@ -424,18 +427,9 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
*/
public static int[] getAlleleCountFromPLIndex(final int nAlleles, final int numChromosomes, final int PLindex) {
- // todo - another brain-dead inefficient implementation, can do much better by computing in closed form
- final SumIterator iterator = new SumIterator(nAlleles,numChromosomes);
- while (iterator.hasNext()) {
- final int[] plVec = iterator.getCurrentVector();
- if (iterator.getLinearIndex() == PLindex)
- return plVec;
-
- iterator.next();
- }
-
- return null;
-
+ final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(numChromosomes, nAlleles);
+ final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(PLindex);
+ return alleleCounts.alleleCountsByIndex(nAlleles - 1);
}
/*
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java
index 97a98e704..8225ba9a5 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java
@@ -48,15 +48,17 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
+import htsjdk.variant.variantcontext.*;
+import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger;
-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
@@ -69,8 +71,6 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import htsjdk.variant.variantcontext.*;
-import htsjdk.variant.vcf.VCFConstants;
import java.util.*;
@@ -86,22 +86,20 @@ public abstract class GenotypingEngine sampleNames;
+ protected final SampleList samples;
private final double[] log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels;
- private final GenomeLocParser genomeLocParser;
+ protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal afcm = new ThreadLocal() {
@@ -111,59 +109,32 @@ public abstract class GenotypingEngine resolveSampleNamesFromToolkit(final GenomeAnalysisEngine toolkit) {
- if (toolkit == null)
- throw new IllegalArgumentException("the toolkit cannot be null");
- return new LinkedHashSet<>(toolkit.getSampleDB().getSampleNames());
- }
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
- * @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
- * @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit
+ * @param samples subset of sample to work on identified by their names. If {@code null}, the full toolkit
* sample set will be used instead.
+ * @param genomeLocParser the genome-loc-parser
*
- * @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
+ * @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/
- protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,final Set sampleNames) {
- if (toolkit == null)
- throw new IllegalArgumentException("the toolkit cannot be null");
+ protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) {
+
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
this.configuration = configuration;
logger = Logger.getLogger(getClass());
- this.toolkit = toolkit;
- this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
- numberOfGenomes = this.sampleNames.size() * configuration.genotypeArgs.samplePloidy;
+ this.samples = samples;
+ numberOfGenomes = this.samples.sampleCount() * configuration.genotypeArgs.samplePloidy;
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity,configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity,configuration.genotypeArgs.inputPrior);
- genomeLocParser = toolkit.getGenomeLocParser();
+ this.genomeLocParser = genomeLocParser;
}
/**
@@ -257,7 +228,7 @@ public abstract class GenotypingEngine outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles);
@@ -506,7 +480,9 @@ public abstract class GenotypingEngine, Unif
* Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
*/
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
- protected List annotationsToUse = new ArrayList();
+ protected List annotationsToUse = new ArrayList<>();
/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments,
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
- protected List annotationsToExclude = new ArrayList();
+ protected List annotationsToExclude = new ArrayList<>();
/**
* If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
@@ -218,11 +221,6 @@ public class UnifiedGenotyper extends LocusWalker, Unif
// the calculation arguments
private UnifiedGenotypingEngine genotypingEngine = null;
- // the annotation engine
- private VariantAnnotatorEngine annotationEngine;
-
- private Set samples;
-
// enable deletions in the pileup
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@@ -256,17 +254,22 @@ public class UnifiedGenotyper extends LocusWalker, Unif
*
**/
public void initialize() {
+ super.initialize();
+ final GenomeAnalysisEngine toolkit = getToolkit();
+ final Set sampleNameSet;
if ( UAC.TREAT_ALL_READS_AS_SINGLE_POOL ) {
- samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
+ sampleNameSet = Collections.singleton(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
// get all of the unique sample names
- samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
+ sampleNameSet = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
if ( UAC.referenceSampleName != null )
- samples.remove(UAC.referenceSampleName);
+ sampleNameSet.remove(UAC.referenceSampleName);
}
+ final SampleList samples = new IndexedSampleList(sampleNameSet);
+
if ( UAC.CONTAMINATION_FRACTION_FILE != null )
- UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
+ UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, sampleNameSet, logger));
// check for a bad max alleles value
if ( UAC.genotypeArgs.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
@@ -282,8 +285,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
- annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
- genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
+ final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
+ genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
@@ -298,11 +301,11 @@ public class UnifiedGenotyper extends LocusWalker, Unif
final Set samplesForHeader;
if ( ! onlyEmitSamples.isEmpty() ) {
// make sure that onlyEmitSamples is a subset of samples
- if ( ! samples.containsAll(onlyEmitSamples) )
+ if ( ! sampleNameSet.containsAll(onlyEmitSamples) )
throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
samplesForHeader = onlyEmitSamples;
} else {
- samplesForHeader = samples;
+ samplesForHeader = sampleNameSet;
}
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
@@ -310,7 +313,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif
public static Set getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {
- Set headerInfo = new HashSet();
+ final Set headerInfo = new HashSet<>();
// all annotation fields from VariantAnnotatorEngine
if ( annotationEngine != null )
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
index 36ac2e5bf..22bc09e98 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java
@@ -45,12 +45,16 @@
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypesContext;
+import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser;
@@ -62,9 +66,6 @@ import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import htsjdk.variant.variantcontext.Allele;
-import htsjdk.variant.variantcontext.GenotypesContext;
-import htsjdk.variant.variantcontext.VariantContext;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
@@ -88,7 +89,6 @@ public class UnifiedGenotypingEngine extends GenotypingEngineThe new engine won't emmit annotations, will use the full sample set and will not produce additional verbose
- * output
+ * Creates a new unified genotyping given the UG configuration parameters and the GA engine.
+ * @param configuration the UG configuration.
+ * @param toolkit the GA engine.
*
- * @param toolkit reference to the enclosing genome analysis engine.
- * @param configuration configuration object.
- *
- * @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
+ * @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/
- public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) {
- this(toolkit, configuration, null);
+ public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
+ final GenomeAnalysisEngine toolkit) {
+ this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
}
+
/**
- * Constructs a new Unified-Genotyper engine.
+ * Creates a new unified genotyping given the UG configuration parameters, the targeted set of samples and
+ * a genome location parser.
*
- * @param toolkit reference to the enclosing genome analysis engine.
- * @param configuration configuration object.
- * @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set.
+ * @param configuration the UG configuration.
+ * @param samples {@inheritDoc}
+ * @param baqCalculationMode the BAQ calculation mode.
*
- * @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
+ * @throws NullPointerException if any of {@code configuration}, {@code samples} or {@code genomeLocParser} is {@code null}.
+ *
+ * @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/
- public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
- final Set sampleNames) {
+ public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
+ final SampleList samples, final GenomeLocParser genomeLocParser,
+ final BAQ.CalculationMode baqCalculationMode) {
- super(toolkit,configuration,sampleNames);
+ super(configuration,samples,genomeLocParser);
- this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
- genomeLocParser = toolkit.getGenomeLocParser();
+ if (baqCalculationMode == null)
+ throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
+
+ this.BAQEnabledOnCMDLine = baqCalculationMode != BAQ.CalculationMode.OFF;
determineGLModelsToUse();
@@ -302,7 +308,8 @@ public class UnifiedGenotypingEngine extends GenotypingEngine perReadAlleleLikelihoodMap) {
- return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
+ return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine,
+ genomeLocParser != null || refContext == null ? genomeLocParser : refContext.getGenomeLocParser(), perReadAlleleLikelihoodMap);
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java
index e4cbf0b5d..ad4ce979a 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java
@@ -48,10 +48,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
-import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
-import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.lang.reflect.Constructor;
import java.util.LinkedList;
@@ -105,6 +103,24 @@ public class AFCalcFactory {
}
public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; }
+
+ /**
+ * Returns the best (fastest) model give the required ploidy and alternative allele count.
+ * @param requiredPloidy required ploidy
+ * @param requiredAlternativeAlleleCount required alternative allele count.
+ * @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
+ * @return never {@code null}
+ */
+ public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) {
+ final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel;
+ if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
+ return preferred;
+ if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
+ return EXACT_INDEPENDENT;
+ if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
+ return EXACT_REFERENCE;
+ return EXACT_GENERAL_PLOIDY;
+ }
}
private static final Map> afClasses;
@@ -137,25 +153,10 @@ public class AFCalcFactory {
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples,
final Logger logger) {
- final int maxAltAlleles = UAC.genotypeArgs.MAX_ALTERNATE_ALLELES;
- if ( ! UAC.AFmodel.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) ) {
- logger.info("Requested ploidy " + UAC.genotypeArgs.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
- final List supportingCalculations = new LinkedList();
- for ( final Calculation calc : Calculation.values() ) {
- if ( calc.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) )
- supportingCalculations.add(calc);
- }
+ final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
+ UAC.requestedAlleleFrequencyCalculationModel);
- if ( supportingCalculations.isEmpty() )
- throw new UserException("no AFCalculation model found that supports ploidy of " + UAC.genotypeArgs.samplePloidy + " and max alt alleles " + maxAltAlleles);
- else if ( supportingCalculations.size() > 1 )
- logger.debug("Warning, multiple supporting AFCalcs found " + Utils.join(",", supportingCalculations) + " choosing first arbitrarily");
- else
- UAC.AFmodel = supportingCalculations.get(0);
- logger.info("Selecting model " + UAC.AFmodel);
- }
-
- final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, maxAltAlleles, UAC.genotypeArgs.samplePloidy);
+ final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy);
if ( logger != null ) calc.setLogger(logger);
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
index 500cdf4ce..98df248aa 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
@@ -56,7 +56,7 @@ import htsjdk.variant.variantcontext.*;
import java.util.*;
public class GeneralPloidyExactAFCalc extends ExactAFCalc {
- static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
+ static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
index ace4f22d2..0008f5b28 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
@@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.log4j.Logger;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@@ -113,7 +114,7 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc
}
@Override
- public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final List samples, final Map> perSampleReadList) {
+ public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map> perSampleReadList) {
final GraphBasedLikelihoodCalculationEngineInstance graphLikelihoodEngine =
new GraphBasedLikelihoodCalculationEngineInstance(assemblyResultSet,
hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java
index 7d4b3db1a..e2229cd50 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngineInstance.java
@@ -46,7 +46,11 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
+import org.broadinstitute.gatk.genotyping.AlleleList;
+import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Path;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Route;
@@ -61,7 +65,6 @@ import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pairhmm.FlexibleHMM;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
-import htsjdk.variant.variantcontext.Allele;
import java.util.*;
@@ -233,12 +236,13 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
- public ReadLikelihoods computeReadLikelihoods(final List haplotypes, final List samples,
+ public ReadLikelihoods computeReadLikelihoods(final List haplotypes, final SampleList samples,
final Map> perSampleReadList) {
// General preparation on the input haplotypes:
- final ReadLikelihoods result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);
final List sortedHaplotypes = new ArrayList<>(haplotypes);
Collections.sort(sortedHaplotypes, Haplotype.ALPHANUMERICAL_COMPARATOR);
+ final AlleleList alleles = new IndexedAlleleList<>(sortedHaplotypes);
+ final ReadLikelihoods result = new ReadLikelihoods<>(samples, alleles, perSampleReadList);
// The actual work:
final int sampleCount = result.sampleCount();
@@ -315,7 +319,7 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
private void calculatePerReadAlleleLikelihoodMapHaplotypeProcessing(final int haplotypeIndex,
final ReadLikelihoods.Matrix likelihoods,
final Map> costsEndingByVertex) {
- final Haplotype haplotype = likelihoods.allele(haplotypeIndex);
+ final Haplotype haplotype = likelihoods.alleleAt(haplotypeIndex);
final HaplotypeRoute haplotypeRoute = haplotypeGraph.getHaplotypeRoute(haplotype);
final Set haplotypeVertices = haplotypeRoute.vertexSet();
final Map readCostByRead = new HashMap<>();
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
index 2ee5752f0..5e6a519ea 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
@@ -52,6 +52,7 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
@@ -64,15 +65,12 @@ import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*;
+import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
-import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
-import org.broadinstitute.gatk.utils.GenomeLoc;
-import org.broadinstitute.gatk.utils.MathUtils;
-import org.broadinstitute.gatk.utils.QualityUtils;
-import org.broadinstitute.gatk.utils.SampleUtils;
+import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
@@ -606,7 +604,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
// the minimum length of a read we'd consider using for genotyping
private final static int MIN_READ_LENGTH = 10;
- private List samplesList;
+ private SampleList samplesList;
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@@ -626,9 +624,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
public void initialize() {
super.initialize();
- if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY)
- throw new UserException.BadArgumentValue("-ploidy", "" + SCAC.genotypeArgs.samplePloidy + "; currently HaplotypeCaller only supports diploid sample analysis (-ploidy 2)");
-
if (dontGenotype && emitReferenceConfidence())
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
@@ -656,12 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
}
- if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
- throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
-
-
- samplesList = new ArrayList<>(SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()));
- Set samplesSet = new LinkedHashSet<>(samplesList);
+ final GenomeAnalysisEngine toolkit = getToolkit();
+ samplesList = toolkit.getReadSampleList();
+ final Set sampleSet = SampleListUtils.asSet(samplesList);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
@@ -672,20 +664,24 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.CONTAMINATION_FRACTION_FILE = null;
simpleUAC.exactCallsLog = null;
- activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, samplesSet);
+ // Seems that at least with some test data we can lose genuine haploid variation if we use
+ // UGs engine with ploidy == 1
+ simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
+ activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
- SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger));
+ SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, sampleSet, logger));
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
- genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
+ final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
+ genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
- Set headerInfo = new HashSet<>();
+ final Set headerInfo = new HashSet<>();
headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// all annotation fields from VariantAnnotatorEngine
@@ -707,13 +703,20 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
+ if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY) {
+ if (SCAC.emitReferenceConfidence != ReferenceConfidenceMode.NONE)
+ throw new UserException.BadArgumentValue("ERC", "For now ploidies different that 2 are not allow for GVCF or BP_RESOLUTION outputs");
+ headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
+ headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
+ }
+
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotypingEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
- initializeReferenceConfidenceModel(samplesSet, headerInfo);
+ initializeReferenceConfidenceModel(samplesList, headerInfo);
- vcfWriter.writeHeader(new VCFHeader(headerInfo, samplesSet));
+ vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
try {
// fasta reference reader to supplement the edges of the reference sequence
@@ -771,10 +774,11 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
}
- private void initializeReferenceConfidenceModel(final Set samples, final Set headerInfo) {
+ private void initializeReferenceConfidenceModel(final SampleList samples, final Set headerInfo) {
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
- if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
+ if ( samples.sampleCount() != 1 )
+ throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
@@ -784,7 +788,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
}
try {
- vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
+ vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands,SCAC.genotypeArgs.samplePloidy);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
@@ -857,8 +861,12 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
+ final GenotypingModel genotypingModel = genotypingEngine.getGenotypingModel();
for( final Map.Entry sample : splitContexts.entrySet() ) {
- final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
+ final String sampleName = sample.getKey();
+ // The ploidy here is not dictated by the sample but by the simple genotyping-engine used to determine whether regions are active or not.
+ final int activeRegionDetectionHackishSamplePloidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
+ final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sampleName,activeRegionDetectionHackishSamplePloidy,genotypingModel,sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
@@ -969,8 +977,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
- ( consensusMode ? Collections.emptyList() : givenAlleles ),
- emitReferenceConfidence() );
+ (consensusMode ? Collections.emptyList() : givenAlleles),
+ emitReferenceConfidence());
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
@@ -997,7 +1005,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
// output variant containing region.
result.addAll(referenceConfidenceModel.calculateRefConfidence(assemblyResult.getReferenceHaplotype(),
calledHaplotypes.getCalledHaplotypes(), assemblyResult.getPaddedReferenceLoc(), regionForGenotyping,
- readLikelihoods, calledHaplotypes.getCalls()));
+ readLikelihoods, genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), calledHaplotypes.getCalls()));
// output right-flanking non-variant section:
if (trimmingResult.hasRightFlankingRegion())
result.addAll(referenceModelForNoVariation(trimmingResult.nonVariantRightFlankRegion(),false));
@@ -1110,7 +1118,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
final List haplotypes = Collections.singletonList(refHaplotype);
return referenceConfidenceModel.calculateRefConfidence(refHaplotype, haplotypes,
paddedLoc, region, createDummyStratifiedReadMap(refHaplotype, samplesList, region),
- Collections.emptyList());
+ genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), Collections.emptyList());
} else
return NO_CALLS;
}
@@ -1123,11 +1131,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
* @return a map from sample -> PerReadAlleleLikelihoodMap that maps each read to ref
*/
public static ReadLikelihoods createDummyStratifiedReadMap(final Haplotype refHaplotype,
- final List samples,
+ final SampleList samples,
final ActiveRegion region) {
- return new ReadLikelihoods<>(samples, Collections.singletonList(refHaplotype),
+ return new ReadLikelihoods<>(samples, new IndexedAlleleList<>(refHaplotype),
splitReadsBySample(samples, region.getReads()));
-
}
@@ -1235,18 +1242,15 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
return splitReadsBySample(samplesList, reads);
}
- public static Map> splitReadsBySample( final List samplesList, final Collection reads ) {
+ private static Map> splitReadsBySample( final SampleList samplesList, final Collection reads ) {
final Map> returnMap = new HashMap<>();
- for( final String sample : samplesList) {
- List readList = returnMap.get( sample );
- if( readList == null ) {
- readList = new ArrayList<>();
- returnMap.put(sample, readList);
- }
- }
- for( final GATKSAMRecord read : reads ) {
+ final int sampleCount = samplesList.sampleCount();
+ for (int i = 0; i < sampleCount; i++)
+ returnMap.put(samplesList.sampleAt(i), new ArrayList());
+
+ for( final GATKSAMRecord read : reads )
returnMap.get(read.getReadGroup().getSample()).add(read);
- }
+
return returnMap;
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java
index 321c48166..97bb9efa8 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java
@@ -45,9 +45,9 @@
*/
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
-import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
/**
* Set of arguments for the {@link HaplotypeCaller}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java
index 77c308cfc..76c8dc772 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java
@@ -48,8 +48,9 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
+import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
+import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
@@ -64,7 +65,6 @@ import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotype.MergeVariantsAcrossHaplotypes;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import htsjdk.variant.variantcontext.*;
import java.util.*;
@@ -80,28 +80,33 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine sampleNames) {
- super(toolkit,configuration,sampleNames);
- doPhysicalPhasing = true;
+ public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
+ this(configuration,samples,genomeLocParser,false);
}
-
/**
* Change the merge variant across haplotypes for this engine.
*
@@ -198,9 +203,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine alleleList = new ArrayList<>();
- alleleList.addAll(mergedVC.getAlleles());
- alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
- vcb.alleles(alleleList);
- mergedVC = vcb.make();
- }
+ if (emitReferenceConfidence)
+ mergedVC = addNonRefSymbolicAllele(mergedVC);
final Map mergeMap = new LinkedHashMap<>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
@@ -264,7 +265,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine originalList = mergedVC.getAlleles();
+ final List alleleList = new ArrayList<>(originalList.size() + 1);
+ alleleList.addAll(mergedVC.getAlleles());
+ alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
+ vcb.alleles(alleleList);
+ return vcb.make();
+ }
+
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
// used for genotyping.
private ReadLikelihoods prepareReadAlleleLikelihoodsForAnnotation(
@@ -653,22 +664,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine readLikelihoods, final VariantContext mergedVC ) {
- final GenotypesContext genotypes = GenotypesContext.create(readLikelihoods.sampleCount());
- // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
- for (final String sample : readLikelihoods.samples() ) {
- final int numHaplotypes = mergedVC.getAlleles().size();
- final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
- final double[][] haplotypeLikelihoodMatrix = PairHMMLikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, readLikelihoods, mergedVC.getAlleles(), true);
- int glIndex = 0;
- for( int iii = 0; iii < numHaplotypes; iii++ ) {
- for( int jjj = 0; jjj <= iii; jjj++ ) {
- genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
- }
- }
- logger.debug(" Likelihoods for sample " + sample + " : " + Arrays.toString(genotypeLikelihoods));
- genotypes.add(new GenotypeBuilder(sample).alleles(NO_CALL).PL(genotypeLikelihoods).make());
- }
- return genotypes;
+ final List vcAlleles = mergedVC.getAlleles();
+ final AlleleList alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
+ final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
+ final int sampleCount = samples.sampleCount();
+ final GenotypesContext result = GenotypesContext.create(sampleCount);
+ for (int s = 0; s < sampleCount; s++)
+ result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
+ return result;
}
/**
@@ -779,4 +782,22 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final List samples, final Map> perSampleReadList ) {
+ public ReadLikelihoods computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map> perSampleReadList ) {
- final List haplotypes = assemblyResultSet.getHaplotypeList();
+ final List haplotypeList = assemblyResultSet.getHaplotypeList();
+ final AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList);
// configure the HMM
- initializePairHMM(haplotypes, perSampleReadList);
+ initializePairHMM(haplotypeList, perSampleReadList);
// Add likelihoods for each sample's reads to our result
final ReadLikelihoods result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
index bd72a764d..aa7305bcf 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
@@ -46,11 +46,14 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
+import org.broadinstitute.gatk.genotyping.AlleleList;
+import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
-import htsjdk.variant.variantcontext.Allele;
import java.util.HashMap;
import java.util.List;
@@ -63,17 +66,15 @@ import java.util.Random;
public class RandomLikelihoodCalculationEngine implements ReadLikelihoodCalculationEngine {
@Override
- public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
- final List samples,
+ public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
+ final SampleList samples,
final Map> reads) {
- final List haplotypes = assemblyResultSet.getHaplotypeList();
+ final AlleleList haplotypes = new IndexedAlleleList<>(assemblyResultSet.getHaplotypeList());
final ReadLikelihoods result = new ReadLikelihoods(samples, haplotypes, reads);
- final Map alleles = new HashMap<>(haplotypes.size());
- for (final Haplotype haplotype : haplotypes)
- alleles.put(haplotype,Allele.create(haplotype,false));
+ final Map alleles = new HashMap<>(haplotypes.alleleCount());
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
- final int sampleCount = samples.size();
- final int alleleCount = alleles.size();
+ final int sampleCount = samples.sampleCount();
+ final int alleleCount = haplotypes.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final List sampleReads = result.sampleReads(i);
final int readCount = sampleReads.size();
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadLikelihoodCalculationEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadLikelihoodCalculationEngine.java
index 6119cba1c..ccc0c18b8 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadLikelihoodCalculationEngine.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadLikelihoodCalculationEngine.java
@@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
@@ -81,6 +82,7 @@ public interface ReadLikelihoodCalculationEngine {
* active region assembly process.
*
* @param assemblyResultSet the input assembly results.
+ * @param samples the list of targeted samples.
* @param perSampleReadList the input read sets stratified per sample.
*
* @throws NullPointerException if either parameter is {@code null}.
@@ -88,7 +90,7 @@ public interface ReadLikelihoodCalculationEngine {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
- public ReadLikelihoods computeReadLikelihoods(AssemblyResultSet assemblyResultSet, List samples,
+ public ReadLikelihoods computeReadLikelihoods(AssemblyResultSet assemblyResultSet, SampleList samples,
Map> perSampleReadList);
public void close();
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RefVsAnyResult.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RefVsAnyResult.java
index 8726c9365..bb3b64f93 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RefVsAnyResult.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/RefVsAnyResult.java
@@ -46,6 +46,8 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
+import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
+
/**
* Holds information about a genotype call of a single sample reference vs. any non-ref event
*
@@ -58,7 +60,7 @@ final class RefVsAnyResult {
/**
* The genotype likelihoods for ref/ref ref/non-ref non-ref/non-ref
*/
- final double[] genotypeLikelihoods = new double[3];
+ final double[] genotypeLikelihoods;
/**
* AD field value for ref / non-ref
@@ -74,7 +76,31 @@ final class RefVsAnyResult {
* Cap the het and hom var likelihood values by the hom ref likelihood.
*/
protected void capByHomRefLikelihood() {
- genotypeLikelihoods[1] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[1]);
- genotypeLikelihoods[2] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[2]);
+ final int likelihoodCount = genotypeLikelihoods.length;
+ for (int i = 1; i < likelihoodCount; i++)
+ genotypeLikelihoods[i] = Math.min(genotypeLikelihoods[0],genotypeLikelihoods[i]);
}
+
+ /**
+ * Creates a new ref-vs-alt result assuming 3 as the number of genotype likelihoods (human ploidy.
+ */
+ @Deprecated
+ public RefVsAnyResult() {
+ genotypeLikelihoods =
+ new double[(HomoSapiensConstants.DEFAULT_PLOIDY * (HomoSapiensConstants.DEFAULT_PLOIDY + 1)) >> 1];
+ }
+
+ /**
+ * Creates a new ref-vs-alt result indicating the genotype likelihood vector capacity.
+ * @param likelihoodCapacity the required capacity of the likelihood array, should match the possible number of
+ * genotypes given the number of alleles (always 2), ploidy (arbitrary) less the genotyping
+ * model non-sense genotype count if applies.
+ * @throws IllegalArgumentException if {@code likelihoodCapacity} is negative.
+ */
+ public RefVsAnyResult(final int likelihoodCapacity) {
+ if (likelihoodCapacity < 0)
+ throw new IllegalArgumentException("likelihood capacity is negative");
+ genotypeLikelihoods = new double[likelihoodCapacity];
+ }
+
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java
index dc9512eee..116b27009 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java
@@ -51,11 +51,13 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSimpleHeaderLine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
+import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
+import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.locusiterator.LocusIteratorByState;
@@ -85,8 +87,8 @@ public class ReferenceConfidenceModel {
public final static String ALTERNATE_ALLELE_STRING = "ALT"; // arbitrary alternate allele
private final GenomeLocParser genomeLocParser;
- private final Set samples;
- private final SAMFileHeader header; // TODO -- really shouldn't depend on this
+
+ private final SampleList samples;
private final int indelInformativeDepthIndelSize;
private final static boolean WRITE_DEBUGGING_BAM = false;
@@ -103,18 +105,17 @@ public class ReferenceConfidenceModel {
* @param indelInformativeDepthIndelSize the max size of indels to consider when calculating indel informative depths
*/
public ReferenceConfidenceModel(final GenomeLocParser genomeLocParser,
- final Set samples,
+ final SampleList samples,
final SAMFileHeader header,
final int indelInformativeDepthIndelSize) {
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
if ( samples == null ) throw new IllegalArgumentException("samples cannot be null");
- if ( samples.isEmpty() ) throw new IllegalArgumentException("samples cannot be empty");
+ if ( samples.sampleCount() == 0) throw new IllegalArgumentException("samples cannot be empty");
if ( header == null ) throw new IllegalArgumentException("header cannot be empty");
if ( indelInformativeDepthIndelSize < 0) throw new IllegalArgumentException("indelInformativeDepthIndelSize must be >= 1 but got " + indelInformativeDepthIndelSize);
this.genomeLocParser = genomeLocParser;
this.samples = samples;
- this.header = header;
this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
if ( WRITE_DEBUGGING_BAM ) {
@@ -124,8 +125,6 @@ public class ReferenceConfidenceModel {
} else {
debuggingWriter = null;
}
-
- initializeIndelPLCache();
}
/**
@@ -151,7 +150,7 @@ public class ReferenceConfidenceModel {
/**
* Calculate the reference confidence for a single sample given the its read data
*
- * Returns a list of variant contexts, one for each position in the activeregion.getLoc(), each containing
+ * Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
@@ -162,6 +161,8 @@ public class ReferenceConfidenceModel {
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
+ * @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
+ * @param model genotyping model.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
@@ -173,6 +174,8 @@ public class ReferenceConfidenceModel {
final GenomeLoc paddedReferenceLoc,
final ActiveRegion activeRegion,
final ReadLikelihoods readLikelihoods,
+ final PloidyModel ploidyModel,
+ final GenotypingModel model,
final List variantCalls) {
if ( refHaplotype == null ) throw new IllegalArgumentException("refHaplotype cannot be null");
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
@@ -182,12 +185,15 @@ public class ReferenceConfidenceModel {
if ( readLikelihoods == null ) throw new IllegalArgumentException("readLikelihoods cannot be null");
if ( readLikelihoods.sampleCount() != 1 ) throw new IllegalArgumentException("readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.sampleCount());
if ( refHaplotype.length() != activeRegion.getExtendedLoc().size() ) throw new IllegalArgumentException("refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getLocation().size() + " are different");
+ if ( ploidyModel == null) throw new IllegalArgumentException("the ploidy model cannot be null");
+ if ( model == null) throw new IllegalArgumentException("the genotyping model cannot be null");
+ final int ploidy = ploidyModel.samplePloidy(0); // the first sample = the only sample in reference-confidence mode.
final GenomeLoc refSpan = activeRegion.getLocation();
final List refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List results = new ArrayList<>(refSpan.size());
- final String sampleName = readLikelihoods.sample(0);
+ final String sampleName = readLikelihoods.sampleAt(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedLoc().getStart();
for ( final ReadBackedPileup pileup : refPileups ) {
@@ -201,20 +207,20 @@ public class ReferenceConfidenceModel {
// otherwise emit a reference confidence variant context
final int refOffset = offset + globalRefOffset;
final byte refBase = ref[refOffset];
- final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, (byte)6, null);
+ final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(sampleName,ploidy,model,pileup, refBase, (byte)6, null);
homRefCalc.capByHomRefLikelihood();
final Allele refAllele = Allele.create(refBase, true);
final List refSiteAlleles = Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final VariantContextBuilder vcb = new VariantContextBuilder("HC", curPos.getContig(), curPos.getStart(), curPos.getStart(), refSiteAlleles);
- final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Arrays.asList(refAllele, refAllele));
+ final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(refAllele, ploidy));
gb.AD(homRefCalc.AD_Ref_Any);
gb.DP(homRefCalc.getDP());
// genotype likelihood calculation
final GenotypeLikelihoods snpGLs = GenotypeLikelihoods.fromLog10Likelihoods(homRefCalc.genotypeLikelihoods);
final int nIndelInformativeReads = calcNIndelInformativeReads(pileup, refOffset, ref, indelInformativeDepthIndelSize);
- final GenotypeLikelihoods indelGLs = getIndelPLs(nIndelInformativeReads);
+ final GenotypeLikelihoods indelGLs = getIndelPLs(ploidy,nIndelInformativeReads);
// now that we have the SNP and indel GLs, we take the one with the least confidence,
// as this is the most conservative estimate of our certainty that we are hom-ref.
@@ -251,23 +257,51 @@ public class ReferenceConfidenceModel {
* Get indel PLs corresponding to seeing N nIndelInformativeReads at this site
*
* @param nInformativeReads the number of reads that inform us about being ref without an indel at this site
+ * @param ploidy the requested ploidy.
* @return non-null GenotypeLikelihoods given N
*/
- protected final GenotypeLikelihoods getIndelPLs(final int nInformativeReads) {
- return indelPLCache[nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads];
+ protected final GenotypeLikelihoods getIndelPLs(final int ploidy, final int nInformativeReads) {
+ if (ploidy > MAX_N_INDEL_PLOIDY)
+ throw new IllegalArgumentException("you have hit a current limitation of the GVCF output model that cannot handle ploidies larger than " + MAX_N_INDEL_PLOIDY + " , please let the GATK team about it: " + ploidy);
+ return indelPLCache(ploidy, nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads);
}
protected static final int MAX_N_INDEL_INFORMATIVE_READS = 40; // more than this is overkill because GQs are capped at 99 anyway
- private static final GenotypeLikelihoods[] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
+ private static final int MAX_N_INDEL_PLOIDY = 20;
+ private static final GenotypeLikelihoods[][] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_PLOIDY][];
private static final double INDEL_ERROR_RATE = -4.5; // 10^-4.5 indel errors per bp
- private void initializeIndelPLCache() {
- for( int nInformativeReads = 0; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
- final double homRef = 0.0;
- final double het = MathUtils.LOG_ONE_HALF * nInformativeReads;
- final double homVar = INDEL_ERROR_RATE * nInformativeReads;
- indelPLCache[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(new double[]{homRef, het, homVar});
+ private final GenotypeLikelihoods indelPLCache(final int ploidy, final int nInformativeReads) {
+ GenotypeLikelihoods[] indelPLCacheByPloidy = indelPLCache[ploidy];
+ if (indelPLCacheByPloidy == null)
+ return initializeIndelPLCache(ploidy)[nInformativeReads];
+ else
+ return indelPLCacheByPloidy[nInformativeReads];
+ }
+
+ private synchronized GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) {
+ // Double-check whether another thread has done the initialization.
+ if (indelPLCache[ploidy] != null)
+ return indelPLCache[ploidy];
+
+ final double denominator = - MathUtils.Log10Cache.get(ploidy);
+ final GenotypeLikelihoods[] result = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
+ result[0] = GenotypeLikelihoods.fromLog10Likelihoods(new double[ploidy + 1]);
+ for( int nInformativeReads = 1; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
+ final byte indelQual = (byte) Math.round((INDEL_ERROR_RATE * -10));
+ final double refLikelihood = QualityUtils.qualToProbLog10(indelQual);
+ final double altLikelihood = QualityUtils.qualToErrorProbLog10(indelQual);
+ double[] PLs = new double[ploidy + 1];
+ PLs[0] = nInformativeReads * refLikelihood;
+ for (int altCount = 1; altCount <= ploidy; altCount++) {
+ final double refLikelihoodAccum = refLikelihood + MathUtils.Log10Cache.get(ploidy - altCount);
+ final double altLikelihoodAccum = altLikelihood + MathUtils.Log10Cache.get(altCount);
+ PLs[altCount] = nInformativeReads * (MathUtils.approximateLog10SumLog10(refLikelihoodAccum ,altLikelihoodAccum) + denominator);
+ }
+ result[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(PLs);
}
+ indelPLCache[ploidy] = result;
+ return result;
}
/**
@@ -279,6 +313,7 @@ public class ReferenceConfidenceModel {
* @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
* @return a RefVsAnyResult genotype call
*/
+ @Deprecated
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
final RefVsAnyResult result = new RefVsAnyResult();
@@ -305,6 +340,73 @@ public class ReferenceConfidenceModel {
return result;
}
+ /**
+ * Calculate the genotype likelihoods for the sample in pileup for being hom-ref contrasted with being ref vs. alt
+ *
+ * @param sampleName target sample name.
+ * @param ploidy target sample ploidy.
+ * @param genotypingModel model to calculate likelihoods and genotypes.
+ * @param pileup the read backed pileup containing the data we want to evaluate
+ * @param refBase the reference base at this pileup position
+ * @param minBaseQual the min base quality for a read in the pileup at the pileup position to be included in the calculation
+ * @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
+ * @return a RefVsAnyResult genotype call.
+ */
+ public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final String sampleName, final int ploidy,
+ final GenotypingModel genotypingModel,
+ final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
+ final AlleleList alleleList = new IndexedAlleleList<>(Allele.create(refBase,true),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
+ // Notice that the sample name is rather irrelevant as this information is never used, just need to be the same in both lines bellow.
+
+ final int maximumReadCount = pileup.getReads().size();
+
+ final List reads = new ArrayList<>(maximumReadCount);
+ final double[][] likelihoods = new double[2][maximumReadCount];
+ final int[] adCounts = new int[2];
+ int nextIndex = 0;
+ for (final PileupElement p : pileup) {
+ final byte qual = p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual();
+ if (!p.isDeletion() && qual <= minBaseQual)
+ continue;
+ final GATKSAMRecord read = p.getRead();
+ reads.add(read);
+ final boolean isAlt = p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart()
+ || p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip();
+ final int bestAllele;
+ final int worstAllele;
+ if (isAlt) {
+ bestAllele = 1;
+ worstAllele = 0;
+ } else {
+ bestAllele = 0;
+ worstAllele = 1;
+ }
+
+ likelihoods[bestAllele][nextIndex] = QualityUtils.qualToProbLog10(qual);
+ likelihoods[worstAllele][nextIndex++] = QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD;
+ adCounts[bestAllele]++;
+ if (isAlt && hqSoftClips != null && p.isNextToSoftClip())
+ hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) 28));
+ }
+
+ final Map> sampleToReads = Collections.singletonMap(sampleName,reads);
+ final ReadLikelihoods readLikelihoods = new ReadLikelihoods<>(new IndexedSampleList(sampleName),alleleList,sampleToReads);
+ final ReadLikelihoods.Matrix sampleLikelihoods = readLikelihoods.sampleMatrix(0);
+ final int readCount = sampleLikelihoods.readCount();
+ for (int i = 0; i < readCount; i++) {
+ sampleLikelihoods.set(0,i,likelihoods[0][i]);
+ sampleLikelihoods.set(1,i,likelihoods[1][i]);
+ }
+
+ final PloidyModel ploidyModel = new HomogeneousPloidyModel(new IndexedSampleList(sampleName),ploidy);
+ final GenotypingLikelihoods genotypingLikelihoods = genotypingModel.calculateLikelihoods(alleleList, new GenotypingData<>(ploidyModel, readLikelihoods));
+ final double[] genotypeLikelihoodArray = genotypingLikelihoods.sampleLikelihoods(0).getAsVector();
+ final RefVsAnyResult result = new RefVsAnyResult(genotypeLikelihoodArray.length);
+ System.arraycopy(genotypeLikelihoodArray,0,result.genotypeLikelihoods,0,genotypeLikelihoodArray.length);
+ System.arraycopy(adCounts,0,result.AD_Ref_Any,0,2);
+ return result;
+ }
+
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
@@ -330,7 +432,7 @@ public class ReferenceConfidenceModel {
debuggingWriter.addAlignment(read);
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
- true, genomeLocParser, samples, false);
+ true, genomeLocParser, SampleListUtils.asSet(samples), false);
final List pileups = new LinkedList<>();
final int startPos = activeRegionSpan.getStart();
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java
index 6f71868bb..a798c760f 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/PairHMMIndelErrorModel.java
@@ -49,6 +49,9 @@ package org.broadinstitute.gatk.tools.walkers.indels;
import com.google.java.contract.Ensures;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
+import org.broadinstitute.gatk.genotyping.AlleleList;
+import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
+import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@@ -444,10 +447,10 @@ public class PairHMMIndelErrorModel {
// Apparently more than one allele can map to the same haplotype after trimming
final Set distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values());
- final List distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
+ final AlleleList distinctHaplotypesList = new IndexedAlleleList<>(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
// Get the likelihoods for our clipped read against each of our trimmed haplotypes.
final ReadLikelihoods rl = new ReadLikelihoods<>(
- Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
+ new IndexedSampleList(Collections.singletonList("DUMMY_SAMPLE")),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
final ReadLikelihoods.Matrix dummySampleLikelihoods = rl.sampleMatrix(0);
pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
index 432bfc770..f78519f3f 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
@@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.validation;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@@ -349,13 +350,14 @@ public class GenotypeAndValidate extends RodWalker= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
+ final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
- snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac);
+ snpEngine = new UnifiedGenotypingEngine(uac,toolkit);
// Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
- indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel);
+ indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
index ebcb82444..9d18e9981 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
@@ -46,8 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
+import org.broadinstitute.gatk.genotyping.IndexedSampleList;
+import org.broadinstitute.gatk.genotyping.SampleList;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
@@ -111,6 +115,7 @@ import java.util.*;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-10,stop=10))
+@SuppressWarnings("unused")
public class GenotypeGVCFs extends RodWalker implements AnnotatorCompatible, TreeReducible {
/**
@@ -159,12 +164,13 @@ public class GenotypeGVCFs extends RodWalker variantCollection : variantCollections )
variants.addAll(variantCollection.getRodBindings());
- final Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
- final Set samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
+ final GenomeAnalysisEngine toolkit = getToolkit();
+ final Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
+ final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine
- genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), createUAC(), samples);
+ genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
// create the annotation engine
- annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.emptyList(), this, getToolkit());
+ annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.emptyList(), this, toolkit);
// take care of the VCF headers
final Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
@@ -179,7 +185,8 @@ public class GenotypeGVCFs extends RodWalker sampleNameSet = SampleListUtils.asSet(samples);
+ final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader);
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
index 50e8bd416..e6cc614e6 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
@@ -46,6 +46,10 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
+import org.broadinstitute.gatk.genotyping.IndexedSampleList;
+import org.broadinstitute.gatk.genotyping.SampleList;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@@ -120,14 +124,18 @@ public class RegenotypeVariants extends RodWalker implements T
UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
String trackName = variantCollection.variants.getName();
- Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
- UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
+
+ final GenomeAnalysisEngine toolkit = getToolkit();
+ final SampleList samples =
+ new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
+ final Set sampleNameSet = SampleListUtils.asSet(samples);
+ UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
final Set hInfo = new HashSet();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
hInfo.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
- vcfWriter.writeHeader(new VCFHeader(hInfo, samples));
+ vcfWriter.writeHeader(new VCFHeader(hInfo, sampleNameSet));
}
/**
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriter.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriter.java
index e55bf4fa0..5659e76a0 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriter.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriter.java
@@ -46,15 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
-import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
+import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@@ -70,9 +69,7 @@ public class GVCFWriter implements VariantContextWriter {
//
// static VCF field names
//
- protected final static String BLOCK_SIZE_INFO_FIELD = "BLOCK_SIZE";
protected final static String MIN_DP_FORMAT_FIELD = "MIN_DP";
- protected final static String MIN_GQ_FORMAT_FIELD = "MIN_GQ";
//
// Final fields initialized in constructor
@@ -87,6 +84,7 @@ public class GVCFWriter implements VariantContextWriter {
String contigOfNextAvailableStart = null;
private String sampleName = null;
private HomRefBlock currentBlock = null;
+ private final int defaultPloidy;
/**
* Is the proposed GQ partitions well-formed?
@@ -94,7 +92,7 @@ public class GVCFWriter implements VariantContextWriter {
* @param GQPartitions proposed GQ partitions
* @return a non-null string if something is wrong (string explains issue)
*/
- protected static List parsePartitions(final List GQPartitions) {
+ protected static List parsePartitions(final List GQPartitions, final int defaultPloidy) {
if ( GQPartitions == null ) throw new IllegalArgumentException("GQpartitions cannot be null");
if ( GQPartitions.isEmpty() ) throw new IllegalArgumentException("GQpartitions cannot be empty");
@@ -104,10 +102,10 @@ public class GVCFWriter implements VariantContextWriter {
if ( value == null ) throw new IllegalArgumentException("GQPartitions contains a null integer");
if ( value < lastThreshold ) throw new IllegalArgumentException("GQPartitions is out of order. Last is " + lastThreshold + " but next is " + value);
if ( value == lastThreshold ) throw new IllegalArgumentException("GQPartitions is equal elements: Last is " + lastThreshold + " but next is " + value);
- result.add(new HomRefBlock(lastThreshold, value));
+ result.add(new HomRefBlock(lastThreshold, value,defaultPloidy));
lastThreshold = value;
}
- result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE));
+ result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE,defaultPloidy));
return result;
}
@@ -128,11 +126,13 @@ public class GVCFWriter implements VariantContextWriter {
*
* @param underlyingWriter the ultimate destination of the GVCF records
* @param GQPartitions a well-formed list of GQ partitions
+ * @param defaultPloidy the assumed ploidy for input variant context without one.
*/
- public GVCFWriter(final VariantContextWriter underlyingWriter, final List GQPartitions) {
+ public GVCFWriter(final VariantContextWriter underlyingWriter, final List GQPartitions, final int defaultPloidy) {
if ( underlyingWriter == null ) throw new IllegalArgumentException("underlyingWriter cannot be null");
this.underlyingWriter = underlyingWriter;
- this.GQPartitions = parsePartitions(GQPartitions);
+ this.GQPartitions = parsePartitions(GQPartitions,defaultPloidy);
+ this.defaultPloidy = defaultPloidy;
}
/**
@@ -148,10 +148,6 @@ public class GVCFWriter implements VariantContextWriter {
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
- // These annotations are no longer standard
- //header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
- //header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
-
for ( final HomRefBlock partition : GQPartitions ) {
header.addMetaDataLine(partition.toVCFHeaderLine());
}
@@ -188,27 +184,30 @@ public class GVCFWriter implements VariantContextWriter {
* @return a VariantContext to be emitted, or null if non is appropriate
*/
protected VariantContext addHomRefSite(final VariantContext vc, final Genotype g) {
+
if ( nextAvailableStart != -1 ) {
// don't create blocks while the hom-ref site falls before nextAvailableStart (for deletions)
- if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) ) {
+ if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) )
return null;
- }
// otherwise, reset to non-relevant
nextAvailableStart = -1;
contigOfNextAvailableStart = null;
}
- if ( currentBlock == null ) {
- currentBlock = createNewBlock(vc, g);
- return null;
- } else if ( currentBlock.withinBounds(g.getGQ()) ) {
+ final VariantContext result;
+ if (genotypeCanBeMergedInCurrentBlock(g)) {
currentBlock.add(vc.getStart(), g);
- return null;
+ result = null;
} else {
- final VariantContext result = blockToVCF(currentBlock);
+ result = blockToVCF(currentBlock);
currentBlock = createNewBlock(vc, g);
- return result;
}
+ return result;
+ }
+
+ private boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
+ return currentBlock != null && currentBlock.withinBounds(g.getGQ()) && currentBlock.getPloidy() == g.getPloidy()
+ && (currentBlock.getMinPLs() == null || !g.hasPL() || (currentBlock.getMinPLs().length == g.getPL().length));
}
/**
@@ -226,21 +225,20 @@ public class GVCFWriter implements VariantContextWriter {
* Convert a HomRefBlock into a VariantContext
*
* @param block the block to convert
- * @return a VariantContext representing the gVCF encoding for this block
+ * @return a VariantContext representing the gVCF encoding for this block.
+ * It will return {@code null} if input {@code block} is {@code null}, indicating that there
+ * is no variant-context to be output into the VCF.
*/
private VariantContext blockToVCF(final HomRefBlock block) {
- if ( block == null ) throw new IllegalArgumentException("block cannot be null");
+ if ( block == null ) return null;
final VariantContextBuilder vcb = new VariantContextBuilder(block.getStartingVC());
vcb.attributes(new HashMap(2)); // clear the attributes
vcb.stop(block.getStop());
vcb.attribute(VCFConstants.END_KEY, block.getStop());
- // This annotation is no longer standard
- //vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
-
// create the single Genotype with GQ and DP annotations
- final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef()));
+ final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(block.getRef(),block.getPloidy()));
gb.noAD().noPL().noAttributes(); // clear all attributes
gb.GQ(block.getMedianGQ());
gb.DP(block.getMedianDP());
@@ -269,10 +267,12 @@ public class GVCFWriter implements VariantContextWriter {
break;
}
}
- if ( partition == null ) throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition " + partition);
+
+ if ( partition == null )
+ throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition");
// create the block, add g to it, and return it for use
- final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound());
+ final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound(), defaultPloidy);
block.add(vc.getStart(), g);
return block;
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlock.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlock.java
index 179217b91..200eb1366 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlock.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlock.java
@@ -46,11 +46,11 @@
package org.broadinstitute.gatk.utils.gvcf;
-import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeaderLine;
+import org.broadinstitute.gatk.utils.MathUtils;
import java.util.ArrayList;
import java.util.List;
@@ -75,6 +75,7 @@ final class HomRefBlock {
final private List GQs = new ArrayList<>(100);
final private List DPs = new ArrayList<>(100);
private final Allele ref;
+ private final int ploidy;
/**
* Create a new HomRefBlock
@@ -83,7 +84,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
- public HomRefBlock(final VariantContext startingVC, int minGQ, int maxGQ) {
+ public HomRefBlock(final VariantContext startingVC, final int minGQ, final int maxGQ, final int defaultPloidy) {
if ( startingVC == null ) throw new IllegalArgumentException("startingVC cannot be null");
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
@@ -92,6 +93,7 @@ final class HomRefBlock {
this.ref = startingVC.getReference();
this.minGQ = minGQ;
this.maxGQ = maxGQ;
+ this.ploidy = startingVC.getMaxPloidy(defaultPloidy);
}
/**
@@ -100,7 +102,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
- public HomRefBlock(int minGQ, int maxGQ) {
+ public HomRefBlock(final int minGQ, final int maxGQ, final int ploidy) {
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
this.startingVC = null;
@@ -108,6 +110,7 @@ final class HomRefBlock {
this.ref = null;
this.minGQ = minGQ;
this.maxGQ = maxGQ;
+ this.ploidy = ploidy;
}
/**
@@ -119,19 +122,18 @@ final class HomRefBlock {
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
+ if ( g.getPloidy() != ploidy)
+ throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + g.getPloidy() + " != " + ploidy);
- if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
+ if( minPLs == null )
+ minPLs = g.getPL();
+ else { // otherwise take the min with the provided genotype's PLs
final int[] PL = g.getPL();
- if( PL.length == 3 ) {
- minPLs = PL.clone();
- }
- } else { // otherwise take the min with the provided genotype's PLs
- final int[] PL = g.getPL();
- if( PL.length == 3 ) {
- minPLs[0] = Math.min(minPLs[0], PL[0]);
- minPLs[1] = Math.min(minPLs[1], PL[1]);
- minPLs[2] = Math.min(minPLs[2], PL[2]);
- }
+ if (PL.length != minPLs.length)
+ throw new IllegalStateException("trying to merge different PL array sizes: " + PL.length + " != " + minPLs.length);
+ for (int i = 0; i < PL.length; i++)
+ if (minPLs[i] > PL[i])
+ minPLs[i] = PL[i];
}
stop = pos;
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
@@ -182,4 +184,12 @@ final class HomRefBlock {
public VCFHeaderLine toVCFHeaderLine() {
return new VCFHeaderLine("GVCFBlock", "minGQ=" + getGQLowerBound() + "(inclusive),maxGQ=" + getGQUpperBound() + "(exclusive)");
}
+
+ /**
+ * Get the ploidy of this hom-ref block.
+ * @return
+ */
+ public int getPloidy() {
+ return ploidy;
+ }
}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotype/HaplotypeLDCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotype/HaplotypeLDCalculator.java
index 2c17d4d5c..af93892a7 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotype/HaplotypeLDCalculator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/haplotype/HaplotypeLDCalculator.java
@@ -48,6 +48,9 @@ package org.broadinstitute.gatk.utils.haplotype;
import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.gatk.genotyping.AlleleList;
+import org.broadinstitute.gatk.genotyping.AlleleListUtils;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@@ -77,7 +80,9 @@ public class HaplotypeLDCalculator {
@SuppressWarnings("unchecked")
protected HaplotypeLDCalculator() {
haplotypes = Collections.emptyList();
- readLikelihoods = new ReadLikelihoods<>((List)Collections.EMPTY_LIST, (List)Collections.EMPTY_LIST, Collections.EMPTY_MAP);
+ final AlleleList alleleList = AlleleListUtils.emptyList();
+ readLikelihoods = new ReadLikelihoods<>(SampleListUtils.emptyList(),
+ alleleList, Collections.EMPTY_MAP);
}
public HaplotypeLDCalculator(final List haplotypes, final ReadLikelihoods haplotypeReadMap) {
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java
index d17c12b30..51f1e04a2 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java
@@ -50,15 +50,17 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
// the imports for unit testing.
-import org.broadinstitute.gatk.utils.BaseTest;
-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
-import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
-import org.broadinstitute.gatk.utils.MathUtils;
-import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
+import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
+import org.broadinstitute.gatk.genotyping.SampleList;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
+import org.broadinstitute.gatk.utils.BaseTest;
+import org.broadinstitute.gatk.utils.MathUtils;
+import org.broadinstitute.gatk.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@@ -76,7 +78,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
engine.setArguments(new GATKArgumentCollection());
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
- final Set fakeSamples = Collections.singleton("fake");
+ final SampleList fakeSamples = SampleListUtils.singletonList("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
index 66af674e9..d36b1af16 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HCLikelihoodCalculationEnginesBenchmark.java
@@ -47,6 +47,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.utils.pairhmm.ActiveRegionTestDataSet;
import org.broadinstitute.gatk.utils.pairhmm.FastLoglessPairHMM;
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
@@ -112,7 +113,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
public void timeGraphBasedLikelihoods(final int reps) {
for (int i = 0; i < reps; i++) {
final GraphBasedLikelihoodCalculationEngineInstance rtlce = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(), new FastLoglessPairHMM((byte)10),Double.NEGATIVE_INFINITY,HeterogeneousKmerSizeResolution.COMBO_MAX);
- rtlce.computeReadLikelihoods(dataSet.haplotypeList(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
+ rtlce.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}
@@ -121,7 +122,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
- engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
+ engine.computeReadLikelihoods(dataSet.assemblyResultSet(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadThreadingLikelihoodCalculationEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadThreadingLikelihoodCalculationEngineUnitTest.java
index db6923feb..cb946aafe 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadThreadingLikelihoodCalculationEngineUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReadThreadingLikelihoodCalculationEngineUnitTest.java
@@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
+import org.broadinstitute.gatk.genotyping.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.HaplotypeGraph;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
@@ -262,7 +263,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
dataSet = (ActiveRegionTestDataSet) params[0];
if (INTRODUCE_READ_ERRORS) dataSet.introduceErrors(new Random(13));
graphEngine = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(),hmm,Double.NEGATIVE_INFINITY, HeterogeneousKmerSizeResolution.COMBO_MAX);
- graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
+ graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
// clip reads at the anchors.
final Map clippedReads = anchorClippedReads(graphEngine.getHaplotypeGraph(),dataSet.readList());
@@ -272,7 +273,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
clippedReadList.add(clippedReads.containsKey(r) ? clippedReads.get(r) : r);
}
- loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
+ loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
// Change clipped by unclipped in the resulting likelihood map.
for (final GATKSAMRecord r : clippedReads.keySet()) {
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java
index 08edeee98..7c2cd8727 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java
@@ -47,11 +47,12 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.SAMFileHeader;
-import org.broadinstitute.gatk.utils.BaseTest;
-import org.broadinstitute.gatk.utils.GenomeLoc;
-import org.broadinstitute.gatk.utils.GenomeLocParser;
-import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
-import org.broadinstitute.gatk.utils.Utils;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypeLikelihoods;
+import htsjdk.variant.variantcontext.GenotypeType;
+import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.gatk.genotyping.*;
+import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@@ -61,7 +62,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import htsjdk.variant.variantcontext.*;
+import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
@@ -75,7 +76,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final String RGID = "ID1";
GATKSAMReadGroupRecord rg;
final String sample = "NA12878";
- final Set samples = Collections.singleton(sample);
+ final SampleList samples = SampleListUtils.singletonList(sample);
SAMFileHeader header;
ReferenceConfidenceModel model;
@@ -179,12 +180,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
@Test
public void testIndelLikelihoods() {
- GenotypeLikelihoods prev = model.getIndelPLs(0);
+ GenotypeLikelihoods prev = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,0);
Assert.assertEquals(prev.getAsPLs(), new int[]{0, 0, 0});
Assert.assertEquals(-10 * prev.getLog10GQ(GenotypeType.HOM_REF), 0.0);
for ( int i = 1; i <= ReferenceConfidenceModel.MAX_N_INDEL_INFORMATIVE_READS; i++ ) {
- final GenotypeLikelihoods current = model.getIndelPLs(i);
+ final GenotypeLikelihoods current = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,i);
final double prevGQ = -10 * prev.getLog10GQ(GenotypeType.HOM_REF);
final double currGQ = -10 * current.getLog10GQ(GenotypeType.HOM_REF);
Assert.assertTrue(prevGQ < currGQ, "GQ Failed with prev " + prev + " curr " + current + " at " + i);
@@ -288,10 +289,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
- final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
+ final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
+ final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
+ final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
- final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
+ final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
@@ -304,12 +307,14 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final List haplotypes = Arrays.asList(data.getRefHap());
final List calls = Collections.emptyList();
+ final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
+ final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
data.getActiveRegion().add(data.makeRead(start, readLen));
- final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
+ final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getLocation().size(), 0));
for ( int i = start; i < readLen + start; i++ ) expectedDPs.set(i, 1);
- final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
+ final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
@@ -340,10 +345,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
- final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
+ final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
+ final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,HomoSapiensConstants.DEFAULT_PLOIDY);
+ final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
- final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
+ final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java
index 61a6860fd..33e1a4758 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java
@@ -48,6 +48,8 @@ package org.broadinstitute.gatk.utils.genotyper;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
+import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
+import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
@@ -72,7 +74,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testInstantiationAndQuery(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
Assert.assertEquals(result.sampleCount(), samples.length);
Assert.assertEquals(result.alleleCount(), alleles.length);
@@ -85,7 +87,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testLikelihoodFillingAndQuery(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] likelihoods = fillWithRandomLikelihoods(samples, alleles, result);
testLikelihoodMatrixQueries(samples, result, likelihoods);
}
@@ -106,7 +108,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleles(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final int alleleCount = alleles.length;
for (int s = 0; s < samples.length; s++) {
@@ -146,7 +148,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleleMap(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final Map> expected = new HashMap<>(alleles.length);
for (final Allele allele : alleles)
@@ -171,7 +173,7 @@ public class ReadLikelihoodsUnitTest
}
}
if ((bestAlleleLk - secondBestAlleleLk) > ReadLikelihoods.BestAllele.INFORMATIVE_THRESHOLD)
- expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.read(r));
+ expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.readAt(r));
}
}
@@ -189,7 +191,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterPoorlyModeledReads(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
for (int s = 0; s < samples.length; s++) {
final int sampleReadCount = original.sampleReadCount(s);
@@ -220,7 +222,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterReadsToOverlap(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result = original.clone();
@@ -231,7 +233,7 @@ public class ReadLikelihoodsUnitTest
newLikelihoods[s][a] = new double[(original.sampleReadCount(s) + 1) / 2];
final ReadLikelihoods.Matrix sampleMatrix = original.sampleMatrix(s);
for (int r = 0; r < newLikelihoods[s][a].length; r++) {
- Assert.assertEquals(result.readIndex(s,sampleMatrix.read(r << 1)),r);
+ Assert.assertEquals(result.readIndex(s,sampleMatrix.readAt(r << 1)),r);
newLikelihoods[s][a][r] = sampleMatrix.get(a, r << 1);
}
}
@@ -240,14 +242,14 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalizationWithOverlap(final String[] samples, final Allele[] alleles, final Map> reads, final Map> newToOldAlleleMapping) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods marginalized = original.marginalize(newToOldAlleleMapping,evenReadOverlap);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
- final List oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
+ final List oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix oldSmapleLikelihoods = original.sampleMatrix(s);
@@ -268,13 +270,13 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalization(final String[] samples, final Allele[] alleles, final Map> reads, final Map> newToOldAlleleMapping) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods marginalized = original.marginalize(newToOldAlleleMapping);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
- final List oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
+ final List oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix oldSmapleLikelihoods = original.sampleMatrix(s);
@@ -295,7 +297,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeBestToZero(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result= original.clone();
result.normalizeLikelihoods(true, Double.NEGATIVE_INFINITY);
@@ -321,7 +323,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLK(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result= original.clone();
result.normalizeLikelihoods(false, - 0.001);
@@ -354,7 +356,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLKAndBestToZero(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result= original.clone();
result.normalizeLikelihoods(true, - 0.001);
@@ -390,7 +392,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result = original.clone();
@@ -411,8 +413,8 @@ public class ReadLikelihoodsUnitTest
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
// We add too more amongst exisisting alleles:
- result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1),
- result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
+ result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.alleleAt(1),
+ result.alleleAt(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
Assert.assertEquals(original.alleleCount()+3,result.alleleCount());
@@ -439,7 +441,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map> reads) {
- final ReadLikelihoods original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
+ final ReadLikelihoods original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods result = original.clone();
result.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
@@ -473,13 +475,13 @@ public class ReadLikelihoodsUnitTest
private void testLikelihoodMatrixQueries(String[] samples, ReadLikelihoods result, final double[][][] likelihoods) {
for (final String sample : samples) {
final int sampleIndex = result.sampleIndex(sample);
- final double[][] likelihoodMatrix = result.sampleValues(sampleIndex);
final int sampleReadCount = result.sampleReadCount(sampleIndex);
- Assert.assertEquals(result.alleleCount(), likelihoodMatrix.length);
- for (int a = 0; a < likelihoodMatrix.length; a++) {
- Assert.assertEquals(likelihoodMatrix[a].length,sampleReadCount);
+ final int alleleCount = result.alleleCount();
+ Assert.assertEquals(result.alleleCount(), alleleCount);
+ for (int a = 0; a < alleleCount; a++) {
+ Assert.assertEquals(result.sampleReadCount(0),sampleReadCount);
for (int r = 0; r < sampleReadCount; r++)
- Assert.assertEquals(likelihoodMatrix[a][r],
+ Assert.assertEquals(result.sampleMatrix(0).get(a,r),
likelihoods == null ? 0.0 : likelihoods[sampleIndex][a][r], EPSILON);
}
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java
index 3228e87d6..1f0280c82 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java
@@ -46,14 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
-import org.broadinstitute.gatk.utils.BaseTest;
-import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceModel;
-import org.broadinstitute.gatk.utils.Utils;
-import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
+import org.broadinstitute.gatk.utils.BaseTest;
+import org.broadinstitute.gatk.utils.Utils;
+import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@@ -100,21 +100,21 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHeaderWriting() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.writeHeader(new VCFHeader());
Assert.assertTrue(mockWriter.headerWritten);
}
@Test
public void testClose() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close();
Assert.assertTrue(mockWriter.closed);
}
@Test
public void testCloseWithoutClosingUnderlyingWriter() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close(false);
Assert.assertFalse(mockWriter.closed);
}
@@ -164,7 +164,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseEmitsLastVariant() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 0);
@@ -176,7 +176,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseDoesntEmitsLastVariantWhenNonRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@@ -188,7 +188,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@@ -204,7 +204,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryToLowerPositionsRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 30, 30));
writer.add(makeHomRef("20", 31, 30));
@@ -220,7 +220,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 20, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@@ -235,7 +235,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@@ -248,7 +248,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRefThenNonRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@@ -283,7 +283,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testVariantForcesNonRef() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@@ -300,7 +300,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testEmittingTwoBands() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@@ -315,7 +315,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testNonContiguousBlocks() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@@ -329,7 +329,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testDeletion() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@@ -347,7 +347,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHomRefAlt() {
- final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
+ final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, 2);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@@ -383,7 +383,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test(dataProvider = "BandPartitionData")
public void testMyData(final List partitions, final boolean expectedGood) {
try {
- GVCFWriter.parsePartitions(partitions);
+ GVCFWriter.parsePartitions(partitions,2);
Assert.assertTrue(expectedGood, "Expected to fail but didn't");
} catch ( Exception e ) {
Assert.assertTrue(! expectedGood, "Expected to succeed but failed with message " + e.getMessage());
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java
index 36d3aa09e..00d5d6984 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java
@@ -46,11 +46,11 @@
package org.broadinstitute.gatk.utils.gvcf;
-import org.broadinstitute.gatk.utils.BaseTest;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.broadinstitute.gatk.utils.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@@ -58,7 +58,6 @@ import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
public class HomRefBlockUnitTest extends BaseTest {
@@ -71,7 +70,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBasicConstruction() {
- final HomRefBlock band = new HomRefBlock(vc, 10, 20);
+ final HomRefBlock band = new HomRefBlock(vc, 10, 20, 2);
Assert.assertSame(band.getStartingVC(), vc);
Assert.assertEquals(band.getRef(), vc.getReference());
Assert.assertEquals(band.getGQLowerBound(), 10);
@@ -86,7 +85,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testMinMedian() {
//TODO - might be better to make this test use a data provider?
- final HomRefBlock band = new HomRefBlock(vc, 10, 20);
+ final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
int pos = vc.getStart();
@@ -117,7 +116,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBigGQIsCapped() {
- final HomRefBlock band = new HomRefBlock(vc, 10, 20);
+ final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make());
@@ -126,7 +125,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadAdd() {
- final HomRefBlock band = new HomRefBlock(vc, 10, 20);
+ final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make());
@@ -156,7 +155,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(dataProvider = "ContiguousData")
public void testIsContiguous(final String contig, final int pos, final boolean expected) {
- final HomRefBlock band = new HomRefBlock(vc, 10, 20);
+ final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make();
Assert.assertEquals(band.isContiguous(testVC), expected);
}
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
index 8e5ae61fb..feb41712b 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java
@@ -26,14 +26,13 @@
package org.broadinstitute.gatk.engine;
import com.google.java.contract.Ensures;
-import htsjdk.samtools.reference.IndexedFastaSequenceFile;
-import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.reference.IndexedFastaSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.variant.vcf.VCFConstants;
import org.apache.log4j.Logger;
-import org.broadinstitute.gatk.engine.walkers.*;
-import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.datasources.reads.*;
@@ -55,8 +54,12 @@ import org.broadinstitute.gatk.engine.refdata.utils.RMDTriplet;
import org.broadinstitute.gatk.engine.resourcemanagement.ThreadAllocation;
import org.broadinstitute.gatk.engine.samples.SampleDB;
import org.broadinstitute.gatk.engine.samples.SampleDBBuilder;
+import org.broadinstitute.gatk.engine.walkers.*;
+import org.broadinstitute.gatk.genotyping.IndexedSampleList;
+import org.broadinstitute.gatk.genotyping.SampleList;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
+import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
@@ -64,7 +67,6 @@ import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
import org.broadinstitute.gatk.utils.text.XReadLines;
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
-import htsjdk.variant.vcf.VCFConstants;
import java.io.File;
import java.io.FileNotFoundException;
@@ -1138,7 +1140,7 @@ public class GenomeAnalysisEngine {
* Returns data source objects encapsulating all rod data;
* individual rods can be accessed through the returned data source objects.
*
- * @return the rods data sources
+ * @return the rods data sources, never {@code null}.
*/
public List getRodDataSources() {
return this.rodDataSources;
@@ -1254,4 +1256,20 @@ public class GenomeAnalysisEngine {
runtimeLimitInNanoseconds = TimeUnit.NANOSECONDS.convert(args.maxRuntime, args.maxRuntimeUnits);
}
}
+
+ /**
+ * Returns the sample list including all samples.
+ * @return never {@code null}.
+ */
+ public SampleList getSampleList() {
+ return new IndexedSampleList(getSampleDB().getSampleNames());
+ }
+
+ /**
+ * Returns the sample list including samples in read inputs.
+ * @return never {@code null}.
+ */
+ public SampleList getReadSampleList() {
+ return new IndexedSampleList(SampleUtils.getSAMFileSamples(getSAMFileHeader()));
+ }
}
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/AlleleListUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/AlleleListUtils.java
index 380a20379..dfdfe0945 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/AlleleListUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/AlleleListUtils.java
@@ -37,6 +37,24 @@ import java.util.List;
*/
public class AlleleListUtils {
+ @SuppressWarnings("unchecked")
+ private static final AlleleList EMPTY_LIST = new AlleleList() {
+ @Override
+ public int alleleCount() {
+ return 0;
+ }
+
+ @Override
+ public int alleleIndex(final Allele allele) {
+ return -1;
+ }
+
+ @Override
+ public Allele alleleAt(final int index) {
+ throw new IllegalArgumentException("allele index is out of range");
+ }
+ };
+
/**
* Checks whether two allele lists are in fact the same.
* @param first one list to compare.
@@ -107,6 +125,16 @@ public class AlleleListUtils {
return new AsList(list);
}
+ /**
+ * Returns an unmodifiable empty allele-list.
+ * @param the allele class.
+ * @return never {@code null}.
+ */
+ @SuppressWarnings("unchecked")
+ public static final AlleleList emptyList() {
+ return EMPTY_LIST;
+ }
+
/**
* Simple list view of a sample-list.
*/
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/SampleListUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/SampleListUtils.java
index e151d03da..f3787568a 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/SampleListUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/genotyping/SampleListUtils.java
@@ -34,6 +34,33 @@ import java.util.*;
*/
public class SampleListUtils {
+ private static final SampleList EMPTY_LIST = new SampleList() {
+
+ @Override
+ public int sampleCount() {
+ return 0;
+ }
+
+ @Override
+ public int sampleIndex(String sample) {
+ return -1;
+ }
+
+ @Override
+ public String sampleAt(final int sampleIndex) {
+ throw new IllegalArgumentException("index is out of valid range");
+ }
+ };
+
+ /**
+ * Empty list.
+ *
+ * @return never {@code null}
+ */
+ public static SampleList emptyList() {
+ return EMPTY_LIST;
+ }
+
/**
* Checks whether two sample lists are in fact the same.
* @param first one list to compare.
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/SampleUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/SampleUtils.java
index ccb573414..77fc17083 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/SampleUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/SampleUtils.java
@@ -60,9 +60,9 @@ public class SampleUtils {
* @param header the sam file header
* @return list of strings representing the sample names
*/
- public static Set getSAMFileSamples(SAMFileHeader header) {
+ public static Set getSAMFileSamples(final SAMFileHeader header) {
// get all of the unique sample names
- Set samples = new TreeSet();
+ final Set samples = new TreeSet();
List readGroups = header.getReadGroups();
for ( SAMReadGroupRecord readGroup : readGroups )
samples.add(readGroup.getSample());
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
index 97cbebd09..2fdf657cc 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java
@@ -85,6 +85,24 @@ public class GATKVariantContextUtils {
return true;
}
+ /**
+ * Returns a homozygous call allele list given the only allele and the ploidy.
+ *
+ * @param allele the only allele in the allele list.
+ * @param ploidy the ploidy of the resulting allele list.
+ *
+ * @throws IllegalArgumentException if {@code allele} is {@code null} or ploidy is negative.
+ *
+ * @return never {@code null}.
+ */
+ public static List homozygousAlleleList(final Allele allele, final int ploidy) {
+ if (allele == null || ploidy < 0)
+ throw new IllegalArgumentException();
+
+ // Use a tailored inner class to implement the list:
+ return Collections.nCopies(ploidy,allele);
+ }
+
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.