a) Use new method to check for GATK Like, b) minor improvements to indel pool caller (more to come): brain-dead, quick way to limit number of alt alleles to genotype. We can't process too many alt alleles because of the combinatorial explosion of GL values with high ploidy, and some STR validation targets had up to 12 alt alleles, resulting of GL vectors of > 1e8 elements. Can't use pileup elements since typically not many alleles will be in one pileup, and different alleles will appear in different samples, TBD a nicer solution. c) Commit to posterity scala script for large scale validation calling, still work in progress

This commit is contained in:
Guillermo del Angel 2012-07-19 10:24:08 -04:00
parent 5f5edeca63
commit c16f9f2f15
4 changed files with 11 additions and 8 deletions

View File

@ -213,7 +213,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
AlignmentContext mergedContext = AlignmentContextUtils.joinContexts(contexts.values());
Map<String,AlignmentContext> newContext = new HashMap<String,AlignmentContext>();
newContext.put(DUMMY_POOL,mergedContext);
newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
contexts = newContext;
}

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded = false;
@ -94,7 +95,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
final List<Allele> alleles = pair.first;
List<Allele> alleles = pair.first;
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
allelesArePadded = pair.second;
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();

View File

@ -48,7 +48,7 @@ import java.util.Map;
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public static final String DUMMY_LANE = "Lane1";
public static final String DUMMY_POOL = "Pool1";
public static final String DUMMY_SAMPLE_NAME = "DummySample1";
/* public enum Model {
SNP,

View File

@ -25,10 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -40,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -226,7 +225,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
public void initialize() {
// Check for protected modes
if (getToolkit().isGATKLite()) {
if (GATKLiteUtils.isGATKLite()) {
// no polyploid/pooled mode in GATK Like
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ||
UAC.referenceSampleName != null ||
@ -240,7 +239,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy required POOL GL and AF calculation models to be specified right now
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
@ -253,7 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_POOL);
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
if (UAC.referenceSampleName != null )