Second intermediate commit for indel pool caller - now works (more or less) in reference sample-free mode. Still needs a lot of cleanups/add more tests and not done w/refactoring quite yet

This commit is contained in:
Guillermo del Angel 2012-06-13 20:59:17 -04:00
parent 67c0569f9c
commit 92669a0468
1 changed files with 24 additions and 19 deletions

View File

@ -30,12 +30,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,7 +50,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false; private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel; private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap = private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() { new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() { protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -87,6 +90,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
public VariantContext getLikelihoods(final RefMetaDataTracker tracker, public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> contexts, final Map<String, AlignmentContext> contexts,
@ -95,28 +99,30 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final boolean useBAQedPileup, final boolean useBAQedPileup,
final GenomeLocParser locParser) { final GenomeLocParser locParser) {
if (tracker == null)
return null;
GenomeLoc loc = ref.getLocus(); GenomeLoc loc = ref.getLocus();
if (!ref.getLocus().equals(lastSiteVisited)) { // if (!ref.getLocus().equals(lastSiteVisited)) {
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
// starting a new site: clear allele list // starting a new site: clear allele list
lastSiteVisited = ref.getLocus(); lastSiteVisited = ref.getLocus();
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>()); indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear(); haplotypeMap.clear();
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
if (alleleList.isEmpty()) if (alleleList.isEmpty())
return null; return null;
} }
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap);
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty()) if (haplotypeMap == null || haplotypeMap.isEmpty())
return null; return null;
// start making the VariantContext // start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = computeEndLocation(alleleList, loc); final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int eventLength = getEventLength(alleleList); final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
@ -160,14 +166,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get(); return indelLikelihoodMap.get();
} }
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc) { public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0); Allele refAllele = alleles.get(0);
boolean allelesArePadded = true;
int endLoc = loc.getStart() + refAllele.length()-1; int endLoc = loc.getStart() + refAllele.length()-1;
/* if (refAllele.getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
*/
// todo FIXME!
if (allelesArePadded) if (allelesArePadded)
endLoc++; endLoc++;
@ -215,7 +216,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker, public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> contexts, final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType, final AlignmentContextUtils.ReadOrientation contextType,
@ -224,6 +225,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final boolean ignoreSNPAllelesWhenGenotypingIndels) { final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null; VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -234,10 +236,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
break; break;
} }
} }
// ignore places where we don't have a variant // ignore places where we don't have a variant
if (vc == null) if (vc == null)
return alleles; return new Pair<List<Allele>,Boolean>(alleles,false);
if (ignoreSNPAllelesWhenGenotypingIndels) { if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -250,11 +251,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else { } else {
alleles.addAll(vc.getAlleles()); alleles.addAll(vc.getAlleles());
} }
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else { } else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
} }
return alleles; return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
} }
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,