From c16f9f2f15876e1c4385773e28738d1e2e3b61ea Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 19 Jul 2012 10:24:08 -0400 Subject: [PATCH] a) Use new method to check for GATK Like, b) minor improvements to indel pool caller (more to come): brain-dead, quick way to limit number of alt alleles to genotype. We can't process too many alt alleles because of the combinatorial explosion of GL values with high ploidy, and some STR validation targets had up to 12 alt alleles, resulting of GL vectors of > 1e8 elements. Can't use pileup elements since typically not many alleles will be in one pileup, and different alleles will appear in different samples, TBD a nicer solution. c) Commit to posterity scala script for large scale validation calling, still work in progress --- .../PoolGenotypeLikelihoodsCalculationModel.java | 2 +- .../PoolIndelGenotypeLikelihoodsCalculationModel.java | 6 +++++- .../genotyper/GenotypeLikelihoodsCalculationModel.java | 2 +- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 9 ++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java index 8b5639817..37b676601 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java @@ -213,7 +213,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { AlignmentContext mergedContext = AlignmentContextUtils.joinContexts(contexts.values()); Map newContext = new HashMap(); - newContext.put(DUMMY_POOL,mergedContext); + newContext.put(DUMMY_SAMPLE_NAME,mergedContext); contexts = newContext; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java index 0922b8e7f..c2bac4455 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel { + private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private PairHMMIndelErrorModel pairModel; private boolean allelesArePadded = false; @@ -94,7 +95,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi final Pair,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); - final List alleles = pair.first; + List alleles = pair.first; + + if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) + alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); allelesArePadded = pair.second; if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 84467085b..4253ff3ad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -48,7 +48,7 @@ import java.util.Map; public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { public static final String DUMMY_LANE = "Lane1"; - public static final String DUMMY_POOL = "Pool1"; + public static final String DUMMY_SAMPLE_NAME = "DummySample1"; /* public enum Model { SNP, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 31a2dfd77..d504badab 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,10 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -40,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -226,7 +225,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif public void initialize() { // Check for protected modes - if (getToolkit().isGATKLite()) { + if (GATKLiteUtils.isGATKLite()) { // no polyploid/pooled mode in GATK Like if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY || UAC.referenceSampleName != null || @@ -240,7 +239,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif // in full mode: check for consistency in ploidy/pool calling arguments // check for correct calculation models if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { - // polyploidy required POOL GL and AF calculation models to be specified right now + // polyploidy requires POOL GL and AF calculation models to be specified right now if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) { throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2"); @@ -253,7 +252,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif // get all of the unique sample names if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { samples.clear(); - samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_POOL); + samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME); } else { samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); if (UAC.referenceSampleName != null )