a) Use new method to check for GATK Like, b) minor improvements to indel pool caller (more to come): brain-dead, quick way to limit number of alt alleles to genotype. We can't process too many alt alleles because of the combinatorial explosion of GL values with high ploidy, and some STR validation targets had up to 12 alt alleles, resulting of GL vectors of > 1e8 elements. Can't use pileup elements since typically not many alleles will be in one pileup, and different alleles will appear in different samples, TBD a nicer solution. c) Commit to posterity scala script for large scale validation calling, still work in progress
This commit is contained in:
parent
5f5edeca63
commit
c16f9f2f15
|
|
@ -213,7 +213,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
|
||||
AlignmentContext mergedContext = AlignmentContextUtils.joinContexts(contexts.values());
|
||||
Map<String,AlignmentContext> newContext = new HashMap<String,AlignmentContext>();
|
||||
newContext.put(DUMMY_POOL,mergedContext);
|
||||
newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
|
||||
contexts = newContext;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.util.*;
|
||||
|
||||
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
|
||||
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
||||
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded = false;
|
||||
|
|
@ -94,7 +95,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
|
||||
|
||||
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
final List<Allele> alleles = pair.first;
|
||||
List<Allele> alleles = pair.first;
|
||||
|
||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||
allelesArePadded = pair.second;
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ import java.util.Map;
|
|||
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||
|
||||
public static final String DUMMY_LANE = "Lane1";
|
||||
public static final String DUMMY_POOL = "Pool1";
|
||||
public static final String DUMMY_SAMPLE_NAME = "DummySample1";
|
||||
|
||||
/* public enum Model {
|
||||
SNP,
|
||||
|
|
|
|||
|
|
@ -25,10 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -40,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -226,7 +225,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
public void initialize() {
|
||||
|
||||
// Check for protected modes
|
||||
if (getToolkit().isGATKLite()) {
|
||||
if (GATKLiteUtils.isGATKLite()) {
|
||||
// no polyploid/pooled mode in GATK Like
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ||
|
||||
UAC.referenceSampleName != null ||
|
||||
|
|
@ -240,7 +239,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
// in full mode: check for consistency in ploidy/pool calling arguments
|
||||
// check for correct calculation models
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
// polyploidy required POOL GL and AF calculation models to be specified right now
|
||||
// polyploidy requires POOL GL and AF calculation models to be specified right now
|
||||
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
|
||||
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
|
||||
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
|
||||
|
|
@ -253,7 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
// get all of the unique sample names
|
||||
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
|
||||
samples.clear();
|
||||
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_POOL);
|
||||
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
|
||||
} else {
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
if (UAC.referenceSampleName != null )
|
||||
|
|
|
|||
Loading…
Reference in New Issue